def lambda_handler(event, context):
    # Log the received event
    bucket_name = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    keys = [x['s3']['object']['key'] for x in event['Records']]
    plate = key.split('/')[-2]
    batch = key.split('/')[-4]
    image_prefix = key.split(batch)[0]
    prefix = os.path.join(image_prefix, 'workspace/')

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    print('Loading', metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata['wells_with_all_cycles']
    num_series = int(metadata['barcoding_rows']) * int(
        metadata['barcoding_columns'])
    if "barcoding_imperwell" in metadata.keys():
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_cycles = int(metadata['barcoding_cycles'])
    platelist = image_dict.keys()

    pipe_name = pipeline_name
    if metadata["fast_or_slow_mode"] == 'fast':
        if 'fast' not in pipe_name:
            pipe_name = pipe_name[:-7] + '_fast.cppipe'
    else:
        if 'slow' not in pipe_name:
            pipe_name = pipe_name[:-7] + '_slow.cppipe'
    print(pipe_name)

    #First let's check if it seems like the whole thing is done or not
    sqs = boto3.client('sqs')

    filter_prefix = image_prefix + batch + '/illum'
    expected_len = int(metadata['barcoding_cycles']) * len(platelist) * 5

    done = helpful_functions.check_if_run_done(s3,
                                               bucket_name,
                                               filter_prefix,
                                               expected_len,
                                               current_app_name,
                                               prev_step_app_name,
                                               sqs,
                                               duplicate_queue_name,
                                               filter_in='Cycle')

    if not done:
        print('Still work ongoing')
        return ('Still work ongoing')
    else:
        #First thing first, let's make an easier-to-use plate and well list and save it
        plate_and_well_list = []
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            well_list = platedict['1'].keys()
            for eachwell in well_list:
                plate_and_well_list.append((eachplate, eachwell))
        metadata['barcoding_plate_and_well_list'] = plate_and_well_list
        helpful_functions.write_metadata_file(s3, bucket_name, metadata,
                                              metadata_file_name,
                                              metadata_on_bucket_name)
        #Pull the file names we care about, and make the CSV
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            bucket_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/images/' + eachplate
            illum_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/illum/' + eachplate
            per_plate_csv = create_CSVs.create_CSV_pipeline6(
                eachplate, num_series, expected_cycles, bucket_folder,
                illum_folder, platedict, metadata['one_or_many_files'],
                metadata["fast_or_slow_mode"])
            csv_on_bucket_name = prefix + 'load_data_csv/' + batch + '/' + eachplate + '/load_data_pipeline6.csv'
            print('Created', csv_on_bucket_name)
            with open(per_plate_csv, 'rb') as a:
                s3.put_object(Body=a,
                              Bucket=bucket_name,
                              Key=csv_on_bucket_name)

        # first let's just try to run the monitor on the last step, in case we haven't yet
        helpful_functions.try_a_shutdown(s3, bucket_name, prefix, batch,
                                         prev_step_num, prev_step_app_name)

        #now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        #make the jobs
        create_batch_jobs.create_batch_jobs_6(image_prefix, batch, pipe_name,
                                              plate_and_well_list, app_name,
                                              metadata['one_or_many_files'],
                                              num_series)

        #Start a cluster
        if metadata['one_or_many_files'] == 'one':
            njobs = len(plate_and_well_list) * 19
        else:
            njobs = len(plate_and_well_list) * num_series
        run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                            njobs)

        #Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print('Go run the monitor now')
        return ('Cluster started')
def lambda_handler(event, context):
    # Log the received event
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    prefix, batchAndPipe = key.split('pipelines/')
    image_prefix = prefix.split('workspace')[0]
    batch = batchAndPipe.split(pipeline_name)[0][:-1]

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket, metadata_file_name, metadata_on_bucket_name)
    num_series = int(metadata['painting_rows']) * int(
        metadata['painting_columns'])
    if "painting_imperwell" in metadata.keys():
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])

    #Get the list of images in this experiment
    image_list_prefix = image_prefix + batch + '/images/'
    image_list = helpful_functions.paginate_a_folder(s3, bucket,
                                                     image_list_prefix)
    image_dict = helpful_functions.parse_image_names(image_list,
                                                     filter_in='20X',
                                                     filter_out='copy')
    metadata['painting_file_data'] = image_dict
    helpful_functions.write_metadata_file(s3, bucket, metadata,
                                          metadata_file_name,
                                          metadata_on_bucket_name)

    if metadata['one_or_many_files'] == 'one':
        full_well_files = 1
    else:
        full_well_files = num_series

    #Pull the file names we care about, and make the CSV
    platelist = image_dict.keys()
    for eachplate in platelist:
        platedict = image_dict[eachplate]
        well_list = platedict.keys()
        paint_cycle_name = platedict[well_list[0]].keys()[0]
        per_well_im_list = []
        for eachwell in well_list:
            per_well = platedict[eachwell][paint_cycle_name]
            per_well.sort()
            if len(per_well) == full_well_files:
                per_well_im_list.append(per_well)
        bucket_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/images/' + eachplate + '/' + paint_cycle_name
        per_plate_csv = create_CSVs.create_CSV_pipeline1(
            eachplate, num_series, bucket_folder, per_well_im_list,
            metadata['one_or_many_files'])
        csv_on_bucket_name = prefix + 'load_data_csv/' + batch + '/' + eachplate + '/load_data_pipeline1.csv'
        with open(per_plate_csv, 'rb') as a:
            s3.put_object(Body=a, Bucket=bucket, Key=csv_on_bucket_name)

    #Now it's time to run DCP
    #Replacement for 'fab setup'
    app_name = run_DCP.run_setup(bucket, prefix, batch, step)
    #run_DCP.grab_batch_config(bucket,prefix,batch,step)

    #Make a batch
    create_batch_jobs.create_batch_jobs_1(image_prefix, batch, pipeline_name,
                                          platelist, app_name)

    #Start a cluster
    run_DCP.run_cluster(bucket, prefix, batch, step, fleet_file_name,
                        len(platelist))

    #Run the monitor
    run_DCP.run_monitor(bucket, prefix, batch, step)
    print('Go run the monitor now')
def lambda_handler(event, context):
    # Log the received event
    bucket = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]

    prefix, batchAndPipe = key.split("pipelines/")
    image_prefix = prefix.split("workspace")[0]
    batch = batchAndPipe.split(pipeline_name)[0][:-1]

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket, metadata_file_name, metadata_on_bucket_name)
    num_series = int(metadata["barcoding_rows"]) * int(
        metadata["barcoding_columns"])
    if "barcoding_imperwell" in list(metadata.keys()):
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_cycles = int(metadata["barcoding_cycles"])

    # Get the list of images in this experiment - this can take a long time for big experiments so let's add some prints
    print("Getting the list of images")
    image_list_prefix = (
        image_prefix + batch + "/images/"
    )  # the slash here is critical, because we don't want to read images_corrected because it's huge
    image_list = helpful_functions.paginate_a_folder(s3, bucket,
                                                     image_list_prefix)
    print("Image list retrieved")
    image_dict = helpful_functions.parse_image_names(image_list,
                                                     filter_in="10X",
                                                     filter_out="copy")
    metadata["barcoding_file_data"] = image_dict
    print("Parsing the image list")
    # We've saved the previous for looking at/debugging later, but really all we want is the ones with all cycles
    if metadata["one_or_many_files"] == 1:
        parsed_image_dict = helpful_functions.return_full_wells(
            image_dict, expected_cycles, metadata["one_or_many_files"])
    else:
        parsed_image_dict = helpful_functions.return_full_wells(
            image_dict,
            expected_cycles,
            metadata["one_or_many_files"],
            files_per_well=num_series,
        )
    metadata["wells_with_all_cycles"] = parsed_image_dict
    helpful_functions.write_metadata_file(s3, bucket, metadata,
                                          metadata_file_name,
                                          metadata_on_bucket_name)

    # Pull the file names we care about, and make the CSV
    print("Making the CSVs")
    platelist = list(image_dict.keys())
    for eachplate in platelist:
        platedict = parsed_image_dict[eachplate]
        well_list = list(platedict.keys())
        bucket_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                         "/images/" + eachplate)
        per_plate_csv = create_CSVs.create_CSV_pipeline5(
            eachplate,
            num_series,
            expected_cycles,
            bucket_folder,
            platedict,
            metadata["one_or_many_files"],
            metadata["fast_or_slow_mode"],
        )
        csv_on_bucket_name = (prefix + "load_data_csv/" + batch + "/" +
                              eachplate + "/load_data_pipeline5.csv")
        with open(per_plate_csv, "rb") as a:
            s3.put_object(Body=a, Bucket=bucket, Key=csv_on_bucket_name)

    # Now it's time to run DCP
    # Replacement for 'fab setup'
    app_name = run_DCP.run_setup(bucket, prefix, batch, step)
    # run_DCP.grab_batch_config(bucket,prefix,batch,step)

    # Make a batch
    create_batch_jobs.create_batch_jobs_5(image_prefix, batch, pipeline_name,
                                          platelist, expected_cycles, app_name)

    # Start a cluster
    run_DCP.run_cluster(bucket, prefix, batch, step, fleet_file_name,
                        len(platelist) * expected_cycles)

    # Run the monitor
    run_DCP.run_monitor(bucket, prefix, batch, step)
    print("Go run the monitor now")
Beispiel #4
0
def lambda_handler(event, context):
    bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    if "csv" in key:
        plate = key.split("/")[-2].split("-")[0]
        batch = key.split("/")[-5]
        image_prefix = key.split(batch)[0]

    else:
        batch = key.split("/")[-2]
        image_prefix = key.split("workspace")[0]

    prefix = os.path.join(image_prefix, "workspace/")
    print(batch, prefix)

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    print("Loading", metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata["painting_file_data"]
    num_series = int(metadata["painting_rows"]) * int(
        metadata["painting_columns"])
    if "painting_imperwell" in list(metadata.keys()):
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])
    out_range = list(range(0, num_series, range_skip))
    expected_files_per_well = (num_series *
                               int(metadata["painting_channels"])) + 6
    platelist = list(image_dict.keys())
    plate_and_well_list = []
    for eachplate in platelist:
        platedict = image_dict[eachplate]
        well_list = list(platedict.keys())
        for eachwell in well_list:
            plate_and_well_list.append((eachplate, eachwell))
    metadata["painting_plate_and_well_list"] = plate_and_well_list
    helpful_functions.write_metadata_file(s3, bucket_name, metadata,
                                          metadata_file_name,
                                          metadata_on_bucket_name)

    # First let's check if it seems like the whole thing is done or not
    sqs = boto3.client("sqs")

    filter_prefix = image_prefix + batch + "/images_corrected/painting"
    # Expected length shows that all transfers (i.e. all wells) have at least started
    expected_len = (
        (len(plate_and_well_list) - 1) * expected_files_per_well) + 1

    print("Checking if all files are present")
    done = helpful_functions.check_if_run_done(
        s3,
        bucket_name,
        filter_prefix,
        expected_len,
        current_app_name,
        prev_step_app_name,
        sqs,
        duplicate_queue_name,
    )

    if not done:
        print("Still work ongoing")
        return "Still work ongoing"
    else:
        print("Checking CSVs for thresholds")
        image_csv_list = helpful_functions.paginate_a_folder(
            s3,
            bucket_name,
            os.path.join(image_prefix, batch, "images_corrected/painting"),
        )
        image_csv_list = [x for x in image_csv_list if "Image.csv" in x]
        image_df = helpful_functions.concat_some_csvs(s3, bucket_name,
                                                      image_csv_list,
                                                      "Image.csv")
        threshes = image_df["Threshold_FinalThreshold_Cells"]
        calc_upper_percentile = numpy.percentile(threshes, upper_percentile)
        print(
            "In ",
            len(image_csv_list) * num_series,
            f"images, the {upper_percentile} percentile was",
            calc_upper_percentile,
        )
        calc_lower_percentile = numpy.percentile(threshes, lower_percentile)
        print(
            "In ",
            len(image_csv_list) * num_series,
            f"images, the {lower_percentile} percentile was",
            calc_lower_percentile,
        )

        pipeline_on_bucket_name = os.path.join(prefix, "pipelines", batch,
                                               pipeline_name)
        local_pipeline_name = os.path.join("/tmp", pipeline_name)
        local_temp_pipeline_name = os.path.join(
            "/tmp",
            pipeline_name.split(".")[0] + "_edited.cppipe")
        with open(local_pipeline_name, "wb") as f:
            s3.download_fileobj(bucket_name, pipeline_on_bucket_name, f)
        edit_id_secondary(local_pipeline_name, local_temp_pipeline_name,
                          calc_lower_percentile, calc_upper_percentile)
        with open(local_temp_pipeline_name, "rb") as pipeline:
            s3.put_object(Body=pipeline,
                          Bucket=bucket_name,
                          Key=pipeline_on_bucket_name)
        print("Edited pipeline file")

        # Pull the file names we care about, and make the CSV
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            well_list = list(platedict.keys())
            bucket_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                             "/images_corrected/painting")
            per_plate_csv = create_CSVs.create_CSV_pipeline3(
                eachplate, num_series, bucket_folder, well_list, range_skip)
            csv_on_bucket_name = (prefix + "load_data_csv/" + batch + "/" +
                                  eachplate + "/load_data_pipeline3.csv")
            print("Created", csv_on_bucket_name)
            with open(per_plate_csv, "rb") as a:
                s3.put_object(Body=a,
                              Bucket=bucket_name,
                              Key=csv_on_bucket_name)

        # now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        # make the jobs
        create_batch_jobs.create_batch_jobs_3(image_prefix, batch,
                                              pipeline_name,
                                              plate_and_well_list, out_range,
                                              app_name)

        # Start a cluster
        run_DCP.run_cluster(
            bucket_name,
            prefix,
            batch,
            step,
            fleet_file_name,
            len(plate_and_well_list) * len(out_range),
        )

        # Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print("Go run the monitor now")
        return "Cluster started"
def lambda_handler(event, context):
    # Log the received event
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']

    prefix, batchAndPipe = key.split('pipelines/')
    image_prefix = prefix.split('workspace')[0]
    batch = batchAndPipe.split(pipeline_name)[0][:-1]

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix,'metadata',batch,'metadata.json')
    metadata = helpful_functions.download_and_read_metadata_file(s3, bucket, metadata_file_name, metadata_on_bucket_name)
    num_series = int(metadata['barcoding_rows']) * int(metadata['barcoding_columns'])
    if "barcoding_imperwell" in metadata.keys():
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_cycles = int(metadata['barcoding_cycles'])

    #Get the list of images in this experiment - this can take a long time for big experiments so let's add some prints
    print('Getting the list of images')
    image_list_prefix = image_prefix+batch+'/images/' #the slash here is critical, because we don't want to read images_corrected because it's huge
    image_list = helpful_functions.paginate_a_folder(s3,bucket,image_list_prefix)
    print('Image list retrieved')
    image_dict = helpful_functions.parse_image_names(image_list, filter_in = '10X', filter_out = 'copy')
    metadata ['barcoding_file_data'] = image_dict
    print('Parsing the image list')
    #We've saved the previous for looking at/debugging later, but really all we want is the ones with all cycles
    if metadata['one_or_many_files'] == 1:
        parsed_image_dict = helpful_functions.return_full_wells(image_dict,expected_cycles, metadata['one_or_many_files'])
    else:
        parsed_image_dict = helpful_functions.return_full_wells(image_dict,expected_cycles, metadata['one_or_many_files'], files_per_well=num_series)
    metadata['wells_with_all_cycles'] = parsed_image_dict
    helpful_functions.write_metadata_file(s3, bucket, metadata, metadata_file_name, metadata_on_bucket_name)

    #Pull the file names we care about, and make the CSV
    print('Making the CSVs')
    platelist = image_dict.keys()
    for eachplate in platelist:
        platedict = parsed_image_dict[eachplate]
        well_list = platedict.keys()
        bucket_folder = '/home/ubuntu/bucket/'+image_prefix+batch+'/images/'+eachplate
        per_plate_csv = create_CSVs.create_CSV_pipeline5(eachplate, num_series, expected_cycles, bucket_folder, platedict, metadata['one_or_many_files'], metadata["fast_or_slow_mode"])
        csv_on_bucket_name = prefix + 'load_data_csv/'+batch+'/'+eachplate+'/load_data_pipeline5.csv'
        with open(per_plate_csv,'rb') as a:
            s3.put_object(Body= a, Bucket = bucket, Key = csv_on_bucket_name )

    #Now it's time to run DCP
    #Replacement for 'fab setup'
    app_name = run_DCP.run_setup(bucket,prefix,batch,step)
    #run_DCP.grab_batch_config(bucket,prefix,batch,step)

    #Make a batch
    create_batch_jobs.create_batch_jobs_5(image_prefix,batch,pipeline_name,platelist, expected_cycles, app_name)

    #Start a cluster
    run_DCP.run_cluster(bucket,prefix,batch,step, fleet_file_name, len(platelist)*expected_cycles)

    #Run the monitor
    run_DCP.run_monitor(bucket, prefix, batch,step)
    print('Go run the monitor now')
Beispiel #6
0
def lambda_handler(event, context):
    bucket = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    prefix, batchAndPipe = key.split("pipelines/")
    image_prefix = prefix.split("workspace")[0]
    batch = batchAndPipe.split("1_")[0][:-1]

    # Get the metadata file
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket, metadata_file_name, metadata_on_bucket_name)
    # Standard vs. SABER configs
    if "Channeldict" not in list(metadata.keys()):
        print("Update your metadata.json to include Channeldict")
        return "Update your metadata.json to include Channeldict"
    Channeldict = ast.literal_eval(metadata["Channeldict"])
    if len(Channeldict.keys()) == 1:
        SABER = False
        print("Not a SABER experiment")
    if len(Channeldict.keys()) > 1:
        SABER = True
        print("SABER experiment")

    # Calculate number of images from rows and columns in metadata
    num_series = int(metadata["painting_rows"]) * int(
        metadata["painting_columns"])
    # Overwrite rows x columns number series if images per well set in metadata
    if "painting_imperwell" in list(metadata.keys()):
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])

    # Get the list of images in this experiment
    if not SABER:
        parse_name_filter = "20X_CP_"
    if SABER:
        parse_name_filter = ""
    image_list_prefix = image_prefix + batch + "/images/"
    image_list = helpful_functions.paginate_a_folder(s3, bucket,
                                                     image_list_prefix)
    image_dict = helpful_functions.parse_image_names(
        image_list, filter_in=parse_name_filter, filter_out="copy")
    metadata["painting_file_data"] = image_dict
    helpful_functions.write_metadata_file(s3, bucket, metadata,
                                          metadata_file_name,
                                          metadata_on_bucket_name)

    # How many files/well indicates the well has all images present
    if metadata["one_or_many_files"] == "one":
        full_well_files = 1
    else:
        full_well_files = num_series

    # Pull the file names we care about, and make the CSV
    platelist = list(image_dict.keys())
    for eachplate in platelist:
        platedict = image_dict[eachplate]
        well_list = list(platedict.keys())
        Channelrounds = list(Channeldict.keys())
        # Only keep full wells
        print(
            f"{full_well_files} expect files per well and round for {eachplate}"
        )
        incomplete_wells = []
        for eachwell in well_list:
            for eachround in Channelrounds:
                per_well = platedict[eachwell][eachround]
                if len(per_well) != full_well_files:
                    incomplete_wells.append(eachwell)
                    print(
                        f"{eachwell} {eachround} doesn't have full well files. {len(per_well)} files found."
                    )
        if incomplete_wells:
            for well in incomplete_wells:
                del platedict[well]
        bucket_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                         "/images/" + eachplate + "/")
        illum_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                        "/illum/" + eachplate)
        per_plate_csv, per_plate_csv_2 = create_CSVs.create_CSV_pipeline1(
            eachplate,
            num_series,
            bucket_folder,
            illum_folder,
            platedict,
            metadata["one_or_many_files"],
            metadata["Channeldict"],
        )
        csv_on_bucket_name = (prefix + "load_data_csv/" + batch + "/" +
                              eachplate + "/load_data_pipeline1.csv")
        csv_on_bucket_name_2 = (prefix + "load_data_csv/" + batch + "/" +
                                eachplate + "/load_data_pipeline2.csv")
        with open(per_plate_csv, "rb") as a:
            s3.put_object(Body=a, Bucket=bucket, Key=csv_on_bucket_name)
        with open(per_plate_csv_2, "rb") as a:
            s3.put_object(Body=a, Bucket=bucket, Key=csv_on_bucket_name_2)

    # Now it's time to run DCP
    app_name = run_DCP.run_setup(bucket, prefix, batch, step)

    # Make a batch
    if not SABER:
        pipeline_name = "1_CP_Illum.cppipe"
    if SABER:
        pipeline_name = "1_SABER_CP_Illum.cppipe"
    create_batch_jobs.create_batch_jobs_1(image_prefix, batch, pipeline_name,
                                          platelist, app_name)

    # Start a cluster
    run_DCP.run_cluster(bucket, prefix, batch, step, fleet_file_name,
                        len(platelist))

    # Run the monitor
    run_DCP.run_monitor(bucket, prefix, batch, step)
    print("Go run the monitor now")
def lambda_handler(event, context):
    # Log the received event
    bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    keys = [x["s3"]["object"]["key"] for x in event["Records"]]
    plate = key.split("/")[-2]
    batch = key.split("/")[-4]
    image_prefix = key.split(batch)[0]
    prefix = os.path.join(image_prefix, "workspace/")

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch, "metadata.json")
    print("Loading", metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name
    )

    image_dict = metadata["wells_with_all_cycles"]
    num_series = int(metadata["barcoding_rows"]) * int(metadata["barcoding_columns"])
    if "barcoding_imperwell" in list(metadata.keys()):
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_cycles = int(metadata["barcoding_cycles"])
    platelist = list(image_dict.keys())

    # Default pipeline is slow. If images acquired in fast mode, pulls alternate pipeline.
    pipe_name = pipeline_name
    if metadata["fast_or_slow_mode"] == "fast":
        if "fast" not in pipe_name:
            pipe_name = pipe_name[:-7] + "_fast.cppipe"
    print(f"Pipeline name is {pipe_name}")

    # First let's check if it seems like the whole thing is done or not
    sqs = boto3.client("sqs")

    filter_prefix = image_prefix + batch + "/illum"
    expected_len = int(metadata["barcoding_cycles"]) * len(platelist) * 5

    done = helpful_functions.check_if_run_done(
        s3,
        bucket_name,
        filter_prefix,
        expected_len,
        current_app_name,
        prev_step_app_name,
        sqs,
        duplicate_queue_name,
        filter_in="Cycle",
    )

    if not done:
        print("Still work ongoing")
        return "Still work ongoing"
    else:
        # First thing first, let's make an easier-to-use plate and well list and save it
        plate_and_well_list = []
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            well_list = list(platedict["1"].keys())
            for eachwell in well_list:
                plate_and_well_list.append((eachplate, eachwell))
        metadata["barcoding_plate_and_well_list"] = plate_and_well_list
        helpful_functions.write_metadata_file(
            s3, bucket_name, metadata, metadata_file_name, metadata_on_bucket_name
        )
        # Pull the file names we care about, and make the CSV
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            bucket_folder = (
                "/home/ubuntu/bucket/" + image_prefix + batch + "/images/" + eachplate
            )
            illum_folder = (
                "/home/ubuntu/bucket/" + image_prefix + batch + "/illum/" + eachplate
            )
            per_plate_csv = create_CSVs.create_CSV_pipeline6(
                eachplate,
                num_series,
                expected_cycles,
                bucket_folder,
                illum_folder,
                platedict,
                metadata["one_or_many_files"],
                metadata["fast_or_slow_mode"],
            )
            csv_on_bucket_name = (
                prefix
                + "load_data_csv/"
                + batch
                + "/"
                + eachplate
                + "/load_data_pipeline6.csv"
            )
            print("Created", csv_on_bucket_name)
            with open(per_plate_csv, "rb") as a:
                s3.put_object(Body=a, Bucket=bucket_name, Key=csv_on_bucket_name)

        # now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        # make the jobs
        create_batch_jobs.create_batch_jobs_6(
            image_prefix,
            batch,
            pipe_name,
            plate_and_well_list,
            app_name,
            metadata["one_or_many_files"],
            num_series,
        )

        # Start a cluster
        if metadata["one_or_many_files"] == "one":
            njobs = len(plate_and_well_list) * 19
        else:
            njobs = len(plate_and_well_list) * num_series
        run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name, njobs)

        # Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print("Go run the monitor now")
        return "Cluster started"
Beispiel #8
0
def lambda_handler(event, context):
    # Log the received event
    bucket_name = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    keys = [x['s3']['object']['key'] for x in event['Records']]
    plate = key.split('/')[-2].split('-')[0]
    batch = key.split('/')[-4]
    image_prefix = key.split(batch)[0]
    prefix = os.path.join(image_prefix, 'workspace/')

    print(plate, batch, image_prefix, prefix)

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    print('Loading', metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata['painting_file_data']
    num_series = int(metadata['painting_rows']) * int(
        metadata['painting_columns'])
    if "painting_imperwell" in metadata.keys():
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])
    expected_files_per_well = np.ceil(float(num_series) / range_skip)
    platelist = image_dict.keys()
    plate_and_well_list = []
    for eachplate in platelist:
        platedict = image_dict[eachplate]
        well_list = platedict.keys()
        for eachwell in well_list:
            plate_and_well_list.append((eachplate, eachwell))
    metadata['painting_plate_and_well_list'] = plate_and_well_list
    helpful_functions.write_metadata_file(s3, bucket_name, metadata,
                                          metadata_file_name,
                                          metadata_on_bucket_name)

    #First let's check if it seems like the whole thing is done or not
    sqs = boto3.client('sqs')

    filter_prefix = image_prefix + batch + '/images_corrected/painting'
    #Because this step is batched per site (not well) don't need anticipate partial loading of jobs
    expected_len = (len(plate_and_well_list) *
                    expected_files_per_well) + (6 * (len(platelist)))

    done = helpful_functions.check_if_run_done(s3, bucket_name, filter_prefix,
                                               expected_len, current_app_name,
                                               prev_step_app_name, sqs,
                                               duplicate_queue_name)

    if not done:
        print('Still work ongoing')
        return ('Still work ongoing')
    else:
        # first let's just try to run the monitor on the last step, in case we haven't yet
        helpful_functions.try_a_shutdown(s3, bucket_name, prefix, batch,
                                         prev_step_num, prev_step_app_name)

        #now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name,
                                     prefix,
                                     batch,
                                     step,
                                     cellprofiler=False)

        #make the jobs
        create_batch_jobs.create_batch_jobs_4(image_prefix,
                                              batch,
                                              metadata,
                                              plate_and_well_list,
                                              app_name,
                                              tileperside=tileperside,
                                              final_tile_size=final_tile_size)

        #Start a cluster
        run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                            len(plate_and_well_list))

        #Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print('Go run the monitor now')
        return ('Cluster started')