Python download_and_read_metadata_file 예제들, helpful_functions.download_and_read_metadata_file Python 예제들

예제 #1

0

파일 보기

파일: lambda_function.py 프로젝트: daisukekubota0823/pooled-cell-painting-image-processing

def lambda_handler(event, context):
    bucket_name = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    keys = [x['s3']['object']['key'] for x in event['Records']]
    batch = key.split('/')[-2]
    image_prefix = key.split('workspace')[0]
    prefix = os.path.join(image_prefix, 'workspace/')

    print(batch, image_prefix, prefix)

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    print('Loading', metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)
    plate_and_well_list = metadata['barcoding_plate_and_well_list']

    #First let's check if it seems like the whole thing is done or not
    sqs = boto3.client('sqs')

    #now let's do our stuff!
    app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

    #make the jobs
    #create_batch_jobs.create_batch_jobs_6A(image_prefix,batch,pipeline_name_list,plate_and_well_list, app_name)

    #Start a cluster
    run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                        len(plate_and_well_list) * len(pipeline_name_list))

    #Run the monitor
    #run_DCP.run_monitor(bucket_name, prefix, batch,step)
    print('Go run the monitor now')

예제 #2

0

파일 보기

def lambda_handler(event, context):
    # Log the received event
    bucket_name = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    if 'csv' in key:
        plate = key.split('/')[-2].split('-')[0]
        batch = key.split('/')[-5]
        image_prefix = key.split(batch)[0]

    else:
        batch = key.split('/')[-2]
        image_prefix = key.split('workspace')[0]

    prefix = os.path.join(image_prefix,'workspace/')
    print(batch, prefix)

    # Get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix,'metadata',batch,'metadata.json')
    print('Loading', metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata ['painting_file_data']
    num_series = int(metadata['painting_rows']) * int(metadata['painting_columns'])
    if "painting_imperwell" in metadata.keys():
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])
    out_range = range(0,num_series,range_skip)
    expected_files_per_well = (num_series*int(metadata['painting_channels']))+6
    platelist = image_dict.keys()
    plate_and_well_list = metadata['painting_plate_and_well_list']

    for eachplate in platelist:
        platedict = image_dict[eachplate]
        well_list = platedict.keys()
        bucket_folder = '/home/ubuntu/bucket/'+image_prefix+batch+'/images_corrected/painting'
        per_plate_csv = create_CSVs.create_CSV_pipeline3(eachplate, num_series, bucket_folder, well_list, range_skip)
        csv_on_bucket_name = prefix + 'load_data_csv/'+batch+'/'+eachplate+'/load_data_pipeline3A.csv'
        print('Created', csv_on_bucket_name)
        with open(per_plate_csv,'rb') as a:
            s3.put_object(Body= a, Bucket = bucket_name, Key = csv_on_bucket_name)

    # Now let's do our stuff!
    app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)
    print('app_name is',app_name)

    # Make the jobs
    create_batch_jobs.create_batch_jobs_3A(image_prefix, batch, pipeline_name, platelist, well_list, app_name)

    # Start a cluster
    run_DCP.run_cluster(bucket_name, prefix, batch, config_step, fleet_file_name, len(platelist)*len(well_list))

    # Create the monitor
    run_DCP.run_monitor(bucket_name, prefix, batch, step)
    print('Go run the monitor now')
    return('Cluster started')

예제 #3

0

파일 보기

파일: lambda_function.py 프로젝트: daisukekubota0823/pooled-cell-painting-image-processing

def lambda_handler(event, context):
    # Manual trigger
    batch = '20210124_6W_CP228/'
    image_prefix = '2018_11_20_Periscope_X'
    prefix = '2018_11_20_Periscope_X/workspace'
    bucket_name = 'imaging-platform'

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    print("Loading", metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    plate_and_well_list = metadata["barcoding_plate_and_well_list"]
    image_dict = metadata["wells_with_all_cycles"]
    expected_cycles = metadata["barcoding_cycles"]
    platelist = list(image_dict.keys())
    num_sites = int(num_tiles)

    # Pull the file names we care about, and make the CSV
    for eachplate in platelist:
        platedict = image_dict[eachplate]
        well_list = list(platedict["1"].keys())
        bucket_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                         "/images_corrected_cropped")
        per_plate_csv = create_CSVs.create_CSV_pipeline9(
            eachplate, num_sites, expected_cycles, bucket_folder, well_list)
        csv_on_bucket_name = (prefix + "load_data_csv/" + batch + "/" +
                              eachplate + "/load_data_pipeline9.csv")
        print("Created", csv_on_bucket_name)
        with open(per_plate_csv, "rb") as a:
            s3.put_object(Body=a, Bucket=bucket_name, Key=csv_on_bucket_name)

    # now let's do our stuff!
    app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

    # make the jobs
    create_batch_jobs.create_batch_jobs_9(
        image_prefix,
        batch,
        pipeline_name,
        plate_and_well_list,
        list(range(1, num_sites + 1)),
        app_name,
    )

    # Start a cluster
    run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                        num_sites)

    # Run the monitor
    run_DCP.run_monitor(bucket_name, prefix, batch, step)
    print("Go run the monitor now")
    return "Cluster started"

예제 #4

0

파일 보기

def lambda_handler(event, context):
    # Log the received event
    bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    keys = [x["s3"]["object"]["key"] for x in event["Records"]]
    batch = key.split("/")[-2]
    image_prefix = key.split("workspace")[0]
    prefix = os.path.join(image_prefix, "workspace/")

    # Load metadata file
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    print("Loading", metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)
    plate_and_well_list = metadata["barcoding_plate_and_well_list"]
    image_dict = metadata["wells_with_all_cycles"]
    expected_cycles = metadata["barcoding_cycles"]
    platelist = list(image_dict.keys())
    num_series = int(metadata["barcoding_rows"]) * int(
        metadata["barcoding_columns"])
    if "barcoding_imperwell" in list(metadata.keys()):
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_files_per_well = (num_series * (
        (int(metadata["barcoding_cycles"]) * 4) + 1)) + 3
    num_sites = round(len(plate_and_well_list) * num_series / skip)

    # Setup DCP
    app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

    # Make the jobs
    create_batch_jobs.create_batch_jobs_7A(
        image_prefix,
        batch,
        pipeline_name,
        plate_and_well_list,
        list(range(num_series)),
        app_name,
        skip,
    )

    # Start a cluster
    run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                        num_sites)

    # Run the monitor
    run_DCP.run_monitor(bucket_name, prefix, batch, step)
    print("Go run the monitor now")

예제 #5

0

파일 보기

def lambda_handler(event, context):
    # Log the received event
    bucket_name = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    keys = [x['s3']['object']['key'] for x in event['Records']]
    batch = key.split('/')[-2]
    image_prefix = key.split('workspace')[0]
    prefix = os.path.join(image_prefix, 'workspace/')

    print(batch, image_prefix, prefix)

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    print('Loading', metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    plate_and_well_list = metadata['barcoding_plate_and_well_list']
    image_dict = metadata['wells_with_all_cycles']
    expected_cycles = metadata['barcoding_cycles']
    platelist = image_dict.keys()
    num_series = int(metadata['barcoding_rows']) * int(
        metadata['barcoding_columns'])
    if "barcoding_imperwell" in metadata.keys():
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_files_per_well = (num_series * (
        (int(metadata['barcoding_cycles']) * 4) + 1)) + 3
    num_sites = round(len(plate_and_well_list) * num_series / 15)

    #now let's do our stuff!
    app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

    #make the jobs
    create_batch_jobs.create_batch_jobs_7A(image_prefix, batch, pipeline_name,
                                           plate_and_well_list,
                                           range(num_series), app_name)

    #Start a cluster
    run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                        num_sites)

    #Run the monitor
    run_DCP.run_monitor(bucket_name, prefix, batch, step)
    print('Go run the monitor now')

예제 #6

0

파일 보기

파일: lambda_function.py 프로젝트: daisukekubota0823/pooled-cell-painting-image-processing

def lambda_handler(event, context):
    # Log the received event
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    prefix, batchAndPipe = key.split('pipelines/')
    image_prefix = prefix.split('workspace')[0]
    batch = batchAndPipe.split(pipeline_name)[0][:-1]

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket, metadata_file_name, metadata_on_bucket_name)
    num_series = int(metadata['painting_rows']) * int(
        metadata['painting_columns'])
    if "painting_imperwell" in metadata.keys():
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])

    #Get the list of images in this experiment
    image_list_prefix = image_prefix + batch + '/images/'
    image_list = helpful_functions.paginate_a_folder(s3, bucket,
                                                     image_list_prefix)
    image_dict = helpful_functions.parse_image_names(image_list,
                                                     filter_in='20X',
                                                     filter_out='copy')
    metadata['painting_file_data'] = image_dict
    helpful_functions.write_metadata_file(s3, bucket, metadata,
                                          metadata_file_name,
                                          metadata_on_bucket_name)

    if metadata['one_or_many_files'] == 'one':
        full_well_files = 1
    else:
        full_well_files = num_series

    #Pull the file names we care about, and make the CSV
    platelist = image_dict.keys()
    for eachplate in platelist:
        platedict = image_dict[eachplate]
        well_list = platedict.keys()
        paint_cycle_name = platedict[well_list[0]].keys()[0]
        per_well_im_list = []
        for eachwell in well_list:
            per_well = platedict[eachwell][paint_cycle_name]
            per_well.sort()
            if len(per_well) == full_well_files:
                per_well_im_list.append(per_well)
        bucket_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/images/' + eachplate + '/' + paint_cycle_name
        per_plate_csv = create_CSVs.create_CSV_pipeline1(
            eachplate, num_series, bucket_folder, per_well_im_list,
            metadata['one_or_many_files'])
        csv_on_bucket_name = prefix + 'load_data_csv/' + batch + '/' + eachplate + '/load_data_pipeline1.csv'
        with open(per_plate_csv, 'rb') as a:
            s3.put_object(Body=a, Bucket=bucket, Key=csv_on_bucket_name)

    #Now it's time to run DCP
    #Replacement for 'fab setup'
    app_name = run_DCP.run_setup(bucket, prefix, batch, step)
    #run_DCP.grab_batch_config(bucket,prefix,batch,step)

    #Make a batch
    create_batch_jobs.create_batch_jobs_1(image_prefix, batch, pipeline_name,
                                          platelist, app_name)

    #Start a cluster
    run_DCP.run_cluster(bucket, prefix, batch, step, fleet_file_name,
                        len(platelist))

    #Run the monitor
    run_DCP.run_monitor(bucket, prefix, batch, step)
    print('Go run the monitor now')

예제 #7

0

파일 보기

def lambda_handler(event, context):
    bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    if "csv" in key:
        plate = key.split("/")[-2].split("-")[0]
        batch = key.split("/")[-5]
        image_prefix = key.split(batch)[0]

    else:
        batch = key.split("/")[-2]
        image_prefix = key.split("workspace")[0]

    prefix = os.path.join(image_prefix, "workspace/")
    print(batch, prefix)

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    print("Loading", metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata["painting_file_data"]
    num_series = int(metadata["painting_rows"]) * int(
        metadata["painting_columns"])
    if "painting_imperwell" in list(metadata.keys()):
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])
    out_range = list(range(0, num_series, range_skip))
    expected_files_per_well = (num_series *
                               int(metadata["painting_channels"])) + 6
    platelist = list(image_dict.keys())
    plate_and_well_list = []
    for eachplate in platelist:
        platedict = image_dict[eachplate]
        well_list = list(platedict.keys())
        for eachwell in well_list:
            plate_and_well_list.append((eachplate, eachwell))
    metadata["painting_plate_and_well_list"] = plate_and_well_list
    helpful_functions.write_metadata_file(s3, bucket_name, metadata,
                                          metadata_file_name,
                                          metadata_on_bucket_name)

    # First let's check if it seems like the whole thing is done or not
    sqs = boto3.client("sqs")

    filter_prefix = image_prefix + batch + "/images_corrected/painting"
    # Expected length shows that all transfers (i.e. all wells) have at least started
    expected_len = (
        (len(plate_and_well_list) - 1) * expected_files_per_well) + 1

    print("Checking if all files are present")
    done = helpful_functions.check_if_run_done(
        s3,
        bucket_name,
        filter_prefix,
        expected_len,
        current_app_name,
        prev_step_app_name,
        sqs,
        duplicate_queue_name,
    )

    if not done:
        print("Still work ongoing")
        return "Still work ongoing"
    else:
        print("Checking CSVs for thresholds")
        image_csv_list = helpful_functions.paginate_a_folder(
            s3,
            bucket_name,
            os.path.join(image_prefix, batch, "images_corrected/painting"),
        )
        image_csv_list = [x for x in image_csv_list if "Image.csv" in x]
        image_df = helpful_functions.concat_some_csvs(s3, bucket_name,
                                                      image_csv_list,
                                                      "Image.csv")
        threshes = image_df["Threshold_FinalThreshold_Cells"]
        calc_upper_percentile = numpy.percentile(threshes, upper_percentile)
        print(
            "In ",
            len(image_csv_list) * num_series,
            f"images, the {upper_percentile} percentile was",
            calc_upper_percentile,
        )
        calc_lower_percentile = numpy.percentile(threshes, lower_percentile)
        print(
            "In ",
            len(image_csv_list) * num_series,
            f"images, the {lower_percentile} percentile was",
            calc_lower_percentile,
        )

        pipeline_on_bucket_name = os.path.join(prefix, "pipelines", batch,
                                               pipeline_name)
        local_pipeline_name = os.path.join("/tmp", pipeline_name)
        local_temp_pipeline_name = os.path.join(
            "/tmp",
            pipeline_name.split(".")[0] + "_edited.cppipe")
        with open(local_pipeline_name, "wb") as f:
            s3.download_fileobj(bucket_name, pipeline_on_bucket_name, f)
        edit_id_secondary(local_pipeline_name, local_temp_pipeline_name,
                          calc_lower_percentile, calc_upper_percentile)
        with open(local_temp_pipeline_name, "rb") as pipeline:
            s3.put_object(Body=pipeline,
                          Bucket=bucket_name,
                          Key=pipeline_on_bucket_name)
        print("Edited pipeline file")

        # Pull the file names we care about, and make the CSV
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            well_list = list(platedict.keys())
            bucket_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                             "/images_corrected/painting")
            per_plate_csv = create_CSVs.create_CSV_pipeline3(
                eachplate, num_series, bucket_folder, well_list, range_skip)
            csv_on_bucket_name = (prefix + "load_data_csv/" + batch + "/" +
                                  eachplate + "/load_data_pipeline3.csv")
            print("Created", csv_on_bucket_name)
            with open(per_plate_csv, "rb") as a:
                s3.put_object(Body=a,
                              Bucket=bucket_name,
                              Key=csv_on_bucket_name)

        # now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        # make the jobs
        create_batch_jobs.create_batch_jobs_3(image_prefix, batch,
                                              pipeline_name,
                                              plate_and_well_list, out_range,
                                              app_name)

        # Start a cluster
        run_DCP.run_cluster(
            bucket_name,
            prefix,
            batch,
            step,
            fleet_file_name,
            len(plate_and_well_list) * len(out_range),
        )

        # Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print("Go run the monitor now")
        return "Cluster started"

예제 #8

0

파일 보기

파일: lambda_function.py 프로젝트: daisukekubota0823/pooled-cell-painting-image-processing

def lambda_handler(event, context):
    # Log the received event
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']

    prefix, batchAndPipe = key.split('pipelines/')
    image_prefix = prefix.split('workspace')[0]
    batch = batchAndPipe.split(pipeline_name)[0][:-1]

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix,'metadata',batch,'metadata.json')
    metadata = helpful_functions.download_and_read_metadata_file(s3, bucket, metadata_file_name, metadata_on_bucket_name)
    num_series = int(metadata['barcoding_rows']) * int(metadata['barcoding_columns'])
    if "barcoding_imperwell" in metadata.keys():
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_cycles = int(metadata['barcoding_cycles'])

    #Get the list of images in this experiment - this can take a long time for big experiments so let's add some prints
    print('Getting the list of images')
    image_list_prefix = image_prefix+batch+'/images/' #the slash here is critical, because we don't want to read images_corrected because it's huge
    image_list = helpful_functions.paginate_a_folder(s3,bucket,image_list_prefix)
    print('Image list retrieved')
    image_dict = helpful_functions.parse_image_names(image_list, filter_in = '10X', filter_out = 'copy')
    metadata ['barcoding_file_data'] = image_dict
    print('Parsing the image list')
    #We've saved the previous for looking at/debugging later, but really all we want is the ones with all cycles
    if metadata['one_or_many_files'] == 1:
        parsed_image_dict = helpful_functions.return_full_wells(image_dict,expected_cycles, metadata['one_or_many_files'])
    else:
        parsed_image_dict = helpful_functions.return_full_wells(image_dict,expected_cycles, metadata['one_or_many_files'], files_per_well=num_series)
    metadata['wells_with_all_cycles'] = parsed_image_dict
    helpful_functions.write_metadata_file(s3, bucket, metadata, metadata_file_name, metadata_on_bucket_name)

    #Pull the file names we care about, and make the CSV
    print('Making the CSVs')
    platelist = image_dict.keys()
    for eachplate in platelist:
        platedict = parsed_image_dict[eachplate]
        well_list = platedict.keys()
        bucket_folder = '/home/ubuntu/bucket/'+image_prefix+batch+'/images/'+eachplate
        per_plate_csv = create_CSVs.create_CSV_pipeline5(eachplate, num_series, expected_cycles, bucket_folder, platedict, metadata['one_or_many_files'], metadata["fast_or_slow_mode"])
        csv_on_bucket_name = prefix + 'load_data_csv/'+batch+'/'+eachplate+'/load_data_pipeline5.csv'
        with open(per_plate_csv,'rb') as a:
            s3.put_object(Body= a, Bucket = bucket, Key = csv_on_bucket_name )

    #Now it's time to run DCP
    #Replacement for 'fab setup'
    app_name = run_DCP.run_setup(bucket,prefix,batch,step)
    #run_DCP.grab_batch_config(bucket,prefix,batch,step)

    #Make a batch
    create_batch_jobs.create_batch_jobs_5(image_prefix,batch,pipeline_name,platelist, expected_cycles, app_name)

    #Start a cluster
    run_DCP.run_cluster(bucket,prefix,batch,step, fleet_file_name, len(platelist)*expected_cycles)

    #Run the monitor
    run_DCP.run_monitor(bucket, prefix, batch,step)
    print('Go run the monitor now')

예제 #9

0

파일 보기

def lambda_handler(event, context):
    # Log the received event
    bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    keys = [x["s3"]["object"]["key"] for x in event["Records"]]
    plate = key.split("/")[-2].split("-")[0]
    batch = key.split("/")[-5]
    image_prefix = key.split(batch)[0]
    prefix = os.path.join(image_prefix, "workspace/")

    print(plate, batch, image_prefix, prefix)

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    print("Loading", metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata["barcoding_file_data"]
    num_series = int(metadata["barcoding_rows"]) * int(
        metadata["barcoding_columns"])
    if "barcoding_imperwell" in list(metadata.keys()):
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    # number of site * 4 channels barcoding * number of cycles. doesn't include 1 DAPI/site
    expected_files_per_well = int(num_series) * 4 * int(
        metadata["barcoding_cycles"])
    plate_and_well_list = metadata["barcoding_plate_and_well_list"]

    if "barcoding_xoffset_tiles" in list(metadata.keys()):
        barcoding_xoffset_tiles = metadata["barcoding_xoffset_tiles"]
        barcoding_yoffset_tiles = metadata["barcoding_yoffset_tiles"]
    else:
        barcoding_xoffset_tiles = barcoding_yoffset_tiles = "0"

    if "compress" in list(metadata.keys()):
        compress = metadata["compress"]
    else:
        compress = "True"

    # First let's check if it seems like the whole thing is done or not
    sqs = boto3.client("sqs")

    filter_prefix = image_prefix + batch + "/images_corrected/barcoding"
    # Because this step is batched per site (not well) don't need to anticipate partial loading of jobs
    expected_len = (
        int(len(plate_and_well_list)) * int(expected_files_per_well) + 5)

    done = helpful_functions.check_if_run_done(
        s3,
        bucket_name,
        filter_prefix,
        expected_len,
        current_app_name,
        prev_step_app_name,
        sqs,
        duplicate_queue_name,
    )

    if not done:
        print("Still work ongoing")
        return "Still work ongoing"
    else:
        # now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name,
                                     prefix,
                                     batch,
                                     step,
                                     cellprofiler=False)

        # make the jobs
        create_batch_jobs.create_batch_jobs_8(
            bucket_name,
            image_prefix,
            batch,
            metadata,
            plate_and_well_list,
            app_name,
            tileperside=tileperside,
            final_tile_size=final_tile_size,
            xoffset_tiles=barcoding_xoffset_tiles,
            yoffset_tiles=barcoding_yoffset_tiles,
            compress=compress,
        )

        # Start a cluster
        run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                            len(plate_and_well_list))

        # Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print("Go run the monitor now")
        return "Cluster started"

예제 #10

0

파일 보기

파일: lambda_function.py 프로젝트: daisukekubota0823/pooled-cell-painting-image-processing

def lambda_handler(event, context):
    # Log the received event
    bucket = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]

    prefix, batchAndPipe = key.split("pipelines/")
    image_prefix = prefix.split("workspace")[0]
    batch = batchAndPipe.split(pipeline_name)[0][:-1]

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket, metadata_file_name, metadata_on_bucket_name)
    num_series = int(metadata["barcoding_rows"]) * int(
        metadata["barcoding_columns"])
    if "barcoding_imperwell" in list(metadata.keys()):
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_cycles = int(metadata["barcoding_cycles"])

    # Get the list of images in this experiment - this can take a long time for big experiments so let's add some prints
    print("Getting the list of images")
    image_list_prefix = (
        image_prefix + batch + "/images/"
    )  # the slash here is critical, because we don't want to read images_corrected because it's huge
    image_list = helpful_functions.paginate_a_folder(s3, bucket,
                                                     image_list_prefix)
    print("Image list retrieved")
    image_dict = helpful_functions.parse_image_names(image_list,
                                                     filter_in="10X",
                                                     filter_out="copy")
    metadata["barcoding_file_data"] = image_dict
    print("Parsing the image list")
    # We've saved the previous for looking at/debugging later, but really all we want is the ones with all cycles
    if metadata["one_or_many_files"] == 1:
        parsed_image_dict = helpful_functions.return_full_wells(
            image_dict, expected_cycles, metadata["one_or_many_files"])
    else:
        parsed_image_dict = helpful_functions.return_full_wells(
            image_dict,
            expected_cycles,
            metadata["one_or_many_files"],
            files_per_well=num_series,
        )
    metadata["wells_with_all_cycles"] = parsed_image_dict
    helpful_functions.write_metadata_file(s3, bucket, metadata,
                                          metadata_file_name,
                                          metadata_on_bucket_name)

    # Pull the file names we care about, and make the CSV
    print("Making the CSVs")
    platelist = list(image_dict.keys())
    for eachplate in platelist:
        platedict = parsed_image_dict[eachplate]
        well_list = list(platedict.keys())
        bucket_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                         "/images/" + eachplate)
        per_plate_csv = create_CSVs.create_CSV_pipeline5(
            eachplate,
            num_series,
            expected_cycles,
            bucket_folder,
            platedict,
            metadata["one_or_many_files"],
            metadata["fast_or_slow_mode"],
        )
        csv_on_bucket_name = (prefix + "load_data_csv/" + batch + "/" +
                              eachplate + "/load_data_pipeline5.csv")
        with open(per_plate_csv, "rb") as a:
            s3.put_object(Body=a, Bucket=bucket, Key=csv_on_bucket_name)

    # Now it's time to run DCP
    # Replacement for 'fab setup'
    app_name = run_DCP.run_setup(bucket, prefix, batch, step)
    # run_DCP.grab_batch_config(bucket,prefix,batch,step)

    # Make a batch
    create_batch_jobs.create_batch_jobs_5(image_prefix, batch, pipeline_name,
                                          platelist, expected_cycles, app_name)

    # Start a cluster
    run_DCP.run_cluster(bucket, prefix, batch, step, fleet_file_name,
                        len(platelist) * expected_cycles)

    # Run the monitor
    run_DCP.run_monitor(bucket, prefix, batch, step)
    print("Go run the monitor now")

예제 #11

0

파일 보기

def lambda_handler(event, context):
    # Log the received event
    bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    keys = [x["s3"]["object"]["key"] for x in event["Records"]]
    plate = key.split("/")[-2]
    batch = key.split("/")[-4]
    image_prefix = key.split(batch)[0]
    prefix = os.path.join(image_prefix, "workspace/")

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata["painting_file_data"]
    num_series = int(metadata["painting_rows"]) * int(
        metadata["painting_columns"])
    if "painting_imperwell" in list(metadata.keys()):
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])

    # Pull the file names we care about, and make the CSV
    platelist = list(image_dict.keys())
    plate = key.split("/")[-2]
    platedict = image_dict[plate]
    well_list = list(platedict.keys())
    paint_cycle_name = list(platedict[well_list[0]].keys())[0]
    per_well_im_list = []
    if metadata["one_or_many_files"] == "one":
        full_well_files = 1
    else:
        full_well_files = num_series
    full_well_list = []
    for eachwell in well_list:
        per_well = platedict[eachwell][paint_cycle_name]
        if len(per_well) == full_well_files:  # only keep full wells
            per_well_im_list.append(per_well)
            full_well_list.append(eachwell)
            print("Added well", eachwell)
        else:
            print(f"Discarded well {eachwell}. Missing images.")
    bucket_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                     "/images/" + plate + "/" + paint_cycle_name)
    illum_folder = "/home/ubuntu/bucket/" + image_prefix + batch + "/illum/" + plate
    per_plate_csv = create_CSVs.create_CSV_pipeline2(
        plate,
        num_series,
        bucket_folder,
        illum_folder,
        per_well_im_list,
        full_well_list,
        metadata["one_or_many_files"],
    )
    csv_on_bucket_name = (prefix + "load_data_csv/" + batch + "/" + plate +
                          "/load_data_pipeline2.csv")
    print(csv_on_bucket_name)
    with open(per_plate_csv, "rb") as a:
        s3.put_object(Body=a, Bucket=bucket_name, Key=csv_on_bucket_name)

    # Now let's check if it seems like the whole thing is done or not
    sqs = boto3.client("sqs")

    filter_prefix = image_prefix + batch + "/illum"
    expected_len = (int(metadata["painting_channels"]) + 1) * len(platelist)

    done = helpful_functions.check_if_run_done(
        s3,
        bucket_name,
        filter_prefix,
        expected_len,
        current_app_name,
        prev_step_app_name,
        sqs,
        duplicate_queue_name,
        filter_out="Cycle",
    )

    if not done:
        print("Still work ongoing")
        return "Still work ongoing"

    else:
        # now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        # make the jobs
        create_batch_jobs.create_batch_jobs_2(image_prefix, batch,
                                              pipeline_name, platelist,
                                              well_list, app_name)

        # Start a cluster
        run_DCP.run_cluster(
            bucket_name,
            prefix,
            batch,
            step,
            fleet_file_name,
            len(platelist) * len(well_list),
        )

        # Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print("Go run the monitor now")
        return "Cluster started"

예제 #12

0

파일 보기

파일: lambda_function.py 프로젝트: daisukekubota0823/pooled-cell-painting-image-processing

def lambda_handler(event, context):
    # Log the received event
    bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    keys = [x["s3"]["object"]["key"] for x in event["Records"]]
    plate = key.split("/")[-2]
    batch = key.split("/")[-4]
    image_prefix = key.split(batch)[0]
    prefix = os.path.join(image_prefix, "workspace/")

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch, "metadata.json")
    print("Loading", metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name
    )

    image_dict = metadata["wells_with_all_cycles"]
    num_series = int(metadata["barcoding_rows"]) * int(metadata["barcoding_columns"])
    if "barcoding_imperwell" in list(metadata.keys()):
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_cycles = int(metadata["barcoding_cycles"])
    platelist = list(image_dict.keys())

    # Default pipeline is slow. If images acquired in fast mode, pulls alternate pipeline.
    pipe_name = pipeline_name
    if metadata["fast_or_slow_mode"] == "fast":
        if "fast" not in pipe_name:
            pipe_name = pipe_name[:-7] + "_fast.cppipe"
    print(f"Pipeline name is {pipe_name}")

    # First let's check if it seems like the whole thing is done or not
    sqs = boto3.client("sqs")

    filter_prefix = image_prefix + batch + "/illum"
    expected_len = int(metadata["barcoding_cycles"]) * len(platelist) * 5

    done = helpful_functions.check_if_run_done(
        s3,
        bucket_name,
        filter_prefix,
        expected_len,
        current_app_name,
        prev_step_app_name,
        sqs,
        duplicate_queue_name,
        filter_in="Cycle",
    )

    if not done:
        print("Still work ongoing")
        return "Still work ongoing"
    else:
        # First thing first, let's make an easier-to-use plate and well list and save it
        plate_and_well_list = []
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            well_list = list(platedict["1"].keys())
            for eachwell in well_list:
                plate_and_well_list.append((eachplate, eachwell))
        metadata["barcoding_plate_and_well_list"] = plate_and_well_list
        helpful_functions.write_metadata_file(
            s3, bucket_name, metadata, metadata_file_name, metadata_on_bucket_name
        )
        # Pull the file names we care about, and make the CSV
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            bucket_folder = (
                "/home/ubuntu/bucket/" + image_prefix + batch + "/images/" + eachplate
            )
            illum_folder = (
                "/home/ubuntu/bucket/" + image_prefix + batch + "/illum/" + eachplate
            )
            per_plate_csv = create_CSVs.create_CSV_pipeline6(
                eachplate,
                num_series,
                expected_cycles,
                bucket_folder,
                illum_folder,
                platedict,
                metadata["one_or_many_files"],
                metadata["fast_or_slow_mode"],
            )
            csv_on_bucket_name = (
                prefix
                + "load_data_csv/"
                + batch
                + "/"
                + eachplate
                + "/load_data_pipeline6.csv"
            )
            print("Created", csv_on_bucket_name)
            with open(per_plate_csv, "rb") as a:
                s3.put_object(Body=a, Bucket=bucket_name, Key=csv_on_bucket_name)

        # now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        # make the jobs
        create_batch_jobs.create_batch_jobs_6(
            image_prefix,
            batch,
            pipe_name,
            plate_and_well_list,
            app_name,
            metadata["one_or_many_files"],
            num_series,
        )

        # Start a cluster
        if metadata["one_or_many_files"] == "one":
            njobs = len(plate_and_well_list) * 19
        else:
            njobs = len(plate_and_well_list) * num_series
        run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name, njobs)

        # Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print("Go run the monitor now")
        return "Cluster started"

예제 #13

0

파일 보기

def lambda_handler(event, context):
    # Log the received event
    bucket_name = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    keys = [x['s3']['object']['key'] for x in event['Records']]
    plate = key.split('/')[-2].split('-')[0]
    batch = key.split('/')[-4]
    image_prefix = key.split(batch)[0]
    prefix = os.path.join(image_prefix, 'workspace/')

    print(plate, batch, image_prefix, prefix)

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    print('Loading', metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata['painting_file_data']
    num_series = int(metadata['painting_rows']) * int(
        metadata['painting_columns'])
    if "painting_imperwell" in metadata.keys():
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])
    expected_files_per_well = np.ceil(float(num_series) / range_skip)
    platelist = image_dict.keys()
    plate_and_well_list = []
    for eachplate in platelist:
        platedict = image_dict[eachplate]
        well_list = platedict.keys()
        for eachwell in well_list:
            plate_and_well_list.append((eachplate, eachwell))
    metadata['painting_plate_and_well_list'] = plate_and_well_list
    helpful_functions.write_metadata_file(s3, bucket_name, metadata,
                                          metadata_file_name,
                                          metadata_on_bucket_name)

    #First let's check if it seems like the whole thing is done or not
    sqs = boto3.client('sqs')

    filter_prefix = image_prefix + batch + '/images_corrected/painting'
    #Because this step is batched per site (not well) don't need anticipate partial loading of jobs
    expected_len = (len(plate_and_well_list) *
                    expected_files_per_well) + (6 * (len(platelist)))

    done = helpful_functions.check_if_run_done(s3, bucket_name, filter_prefix,
                                               expected_len, current_app_name,
                                               prev_step_app_name, sqs,
                                               duplicate_queue_name)

    if not done:
        print('Still work ongoing')
        return ('Still work ongoing')
    else:
        # first let's just try to run the monitor on the last step, in case we haven't yet
        helpful_functions.try_a_shutdown(s3, bucket_name, prefix, batch,
                                         prev_step_num, prev_step_app_name)

        #now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name,
                                     prefix,
                                     batch,
                                     step,
                                     cellprofiler=False)

        #make the jobs
        create_batch_jobs.create_batch_jobs_4(image_prefix,
                                              batch,
                                              metadata,
                                              plate_and_well_list,
                                              app_name,
                                              tileperside=tileperside,
                                              final_tile_size=final_tile_size)

        #Start a cluster
        run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                            len(plate_and_well_list))

        #Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print('Go run the monitor now')
        return ('Cluster started')

예제 #14

0

파일 보기

파일: lambda_function.py 프로젝트: broadinstitute/pooled-cell-painting-image-processing

def lambda_handler(event, context):
    # Set up for Manual Trigger
    bucket_name = 'pooled-cell-painting'
    image_prefix = 'projects/2018_11_20_Periscope_X/'
    batch = 'nameofthebatch'
    prefix = 'projects/2018_11_20_Periscope_X/workspace/'

    print(plate, batch, image_prefix, prefix)

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch, "metadata.json")
    print("Loading", metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name
    )

    image_dict = metadata["barcoding_file_data"]
    num_series = int(metadata["barcoding_rows"]) * int(metadata["barcoding_columns"])
    if "barcoding_imperwell" in list(metadata.keys()):
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    # number of site * 4 channels barcoding * number of cycles. doesn't include 1 DAPI/site
    expected_files_per_well = int(num_series) * 4 * int(metadata["barcoding_cycles"])
    plate_and_well_list = metadata["barcoding_plate_and_well_list"]

    if "barcoding_xoffset_tiles" in list(metadata.keys()):
        barcoding_xoffset_tiles = metadata["barcoding_xoffset_tiles"]
        barcoding_yoffset_tiles = metadata["barcoding_yoffset_tiles"]
    else:
        barcoding_xoffset_tiles = barcoding_yoffset_tiles = "0"

    if "compress" in list(metadata.keys()):
        compress = metadata["compress"]
    else:
        compress = "True"

    # Removed Check if Run Done
    # now let's do our stuff!
    app_name = run_DCP.run_setup(
        bucket_name, prefix, batch, step, cellprofiler=False
    )

    # make the jobs
    create_batch_jobs.create_batch_jobs_8Z(
        bucket_name,
        image_prefix,
        batch,
        metadata,
        plate_and_well_list,
        app_name,
        tileperside=tileperside,
        final_tile_size=final_tile_size,
        xoffset_tiles=barcoding_xoffset_tiles,
        yoffset_tiles=barcoding_yoffset_tiles,
        compress=compress,
    )

    # Start a cluster
    run_DCP.run_cluster(
        bucket_name, prefix, batch, step, fleet_file_name, len(plate_and_well_list)
    )

    # Run the monitor
    run_DCP.run_monitor(bucket_name, prefix, batch, step)
    print("Go run the monitor now")
    return "Cluster started"

예제 #15

0

파일 보기

def lambda_handler(event, context):
    # Log the received event
    bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    keys = [x["s3"]["object"]["key"] for x in event["Records"]]
    plate = key.split("/")[-2]
    batch = key.split("/")[-4]
    image_prefix = key.split(batch)[0]
    prefix = os.path.join(image_prefix, "workspace/")

    # Get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata["painting_file_data"]
    # Calculate number of images from rows and columns in metadata
    num_series = int(metadata["painting_rows"]) * int(
        metadata["painting_columns"])
    # Overwrite rows x columns number series if images per well set in metadata
    if "painting_imperwell" in list(metadata.keys()):
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])

    # Standard vs. SABER configs
    if "Channeldict" not in list(metadata.keys()):
        print("Update your metadata.json to include Channeldict")
        return "Update your metadata.json to include Channeldict"
    Channeldict = ast.literal_eval(metadata["Channeldict"])
    if len(Channeldict.keys()) == 1:
        SABER = False
        print("Not a SABER experiment")
    if len(Channeldict.keys()) > 1:
        SABER = True
        print("SABER experiment")

    platelist = list(image_dict.keys())
    platedict = image_dict[plate]
    well_list = list(platedict.keys())

    # Now let's check if it seems like the whole thing is done or not
    sqs = boto3.client("sqs")

    filter_prefix = image_prefix + batch + "/illum"
    expected_len = (int(metadata["painting_channels"]) + 1) * len(platelist)

    done = helpful_functions.check_if_run_done(
        s3,
        bucket_name,
        filter_prefix,
        expected_len,
        current_app_name,
        prev_step_app_name,
        sqs,
        duplicate_queue_name,
        filter_out="Cycle",
    )

    if not done:
        print("Still work ongoing")
        return "Still work ongoing"

    else:
        # now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        if not SABER:
            pipeline_name = "2_CP_Apply_Illum.cppipe"
        if SABER:
            pipeline_name = "2_SABER_CP_Apply_Illum.cppipe"
        # make the jobs
        create_batch_jobs.create_batch_jobs_2(image_prefix, batch,
                                              pipeline_name, platelist,
                                              well_list, app_name)

        # Start a cluster
        run_DCP.run_cluster(
            bucket_name,
            prefix,
            batch,
            step,
            fleet_file_name,
            len(platelist) * len(well_list),
        )

        # Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print("Go run the monitor now")
        return "Cluster started"

예제 #16

0

파일 보기

파일: lambda_function.py 프로젝트: daisukekubota0823/pooled-cell-painting-image-processing

def lambda_handler(event, context):
    # Log the received event
    bucket_name = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    keys = [x['s3']['object']['key'] for x in event['Records']]
    plate = key.split('/')[-2]
    batch = key.split('/')[-4]
    image_prefix = key.split(batch)[0]
    prefix = os.path.join(image_prefix, 'workspace/')

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata['painting_file_data']
    num_series = int(metadata['painting_rows']) * int(
        metadata['painting_columns'])
    if "painting_imperwell" in metadata.keys():
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])

    #Pull the file names we care about, and make the CSV
    platelist = image_dict.keys()

    plate = key.split('/')[-2]
    platedict = image_dict[plate]
    well_list = platedict.keys()
    paint_cycle_name = platedict[well_list[0]].keys()[0]
    per_well_im_list = []
    if metadata['one_or_many_files'] == 'one':
        full_well_files = 1
    else:
        full_well_files = num_series
    full_well_list = []
    for eachwell in well_list:
        per_well = platedict[eachwell][paint_cycle_name]
        if len(per_well) == full_well_files:  #only keep full wells
            per_well_im_list.append(per_well)
            full_well_list.append(eachwell)
            print('Added well', eachwell)
    bucket_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/images/' + plate + '/' + paint_cycle_name
    illum_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/illum/' + plate
    per_plate_csv = create_CSVs.create_CSV_pipeline2(
        plate, num_series, bucket_folder, illum_folder, per_well_im_list,
        full_well_list, metadata['one_or_many_files'])
    csv_on_bucket_name = prefix + 'load_data_csv/' + batch + '/' + plate + '/load_data_pipeline2.csv'
    print(csv_on_bucket_name)
    with open(per_plate_csv, 'rb') as a:
        s3.put_object(Body=a, Bucket=bucket_name, Key=csv_on_bucket_name)

    #Now let's check if it seems like the whole thing is done or not
    sqs = boto3.client('sqs')

    filter_prefix = image_prefix + batch + '/illum'
    expected_len = (int(metadata['painting_channels']) + 1) * len(platelist)

    done = helpful_functions.check_if_run_done(s3,
                                               bucket_name,
                                               filter_prefix,
                                               expected_len,
                                               current_app_name,
                                               prev_step_app_name,
                                               sqs,
                                               duplicate_queue_name,
                                               filter_out='Cycle')

    if not done:
        print('Still work ongoing')
        return ('Still work ongoing')

    else:
        # first let's just try to run the monitor on
        helpful_functions.try_a_shutdown(s3, bucket_name, prefix, batch,
                                         prev_step_num, prev_step_app_name)

        #now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        #make the jobs
        create_batch_jobs.create_batch_jobs_2(image_prefix, batch,
                                              pipeline_name, platelist,
                                              well_list, app_name)

        #Start a cluster
        run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                            len(platelist) * len(well_list))

        #Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print('Go run the monitor now')
        return ('Cluster started')

예제 #17

0

파일 보기

파일: lambda_function.py 프로젝트: daisukekubota0823/pooled-cell-painting-image-processing

def lambda_handler(event, context):
    bucket_name = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    if 'csv' in key:
        plate = key.split('/')[-2].split('-')[0]
        batch = key.split('/')[-5]
        image_prefix = key.split(batch)[0]

    else:
        batch = key.split('/')[-2]
        image_prefix = key.split('workspace')[0]

    prefix = os.path.join(image_prefix, 'workspace/')
    print(batch, prefix)

    # Get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    print('Loading', metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata['painting_file_data']
    num_series = int(metadata['painting_rows']) * int(
        metadata['painting_columns'])
    if "painting_imperwell" in metadata.keys():
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])
    out_range = range(0, num_series, range_skip)
    expected_files_per_well = (num_series * 6)
    platelist = image_dict.keys()
    plate_and_well_list = metadata['painting_plate_and_well_list']

    # First let's check if 3A is done
    filter_prefix = image_prefix + batch + '/images_segmentation/segment_troubleshoot'
    expected_len = (len(plate_and_well_list) * expected_files_per_well)

    print('Checking if all files are present')
    done = helpful_functions.check_if_run_done(s3, bucket_name, filter_prefix,
                                               expected_len, current_app_name,
                                               prev_step_app_name, sqs,
                                               duplicate_queue_name)

    if not done:
        print('Still work ongoing')
        return ('Still work ongoing')
    else:
        print("Checking CSVs for what the upper threshold should be")
        image_csv_list = helpful_functions.paginate_a_folder(
            s3, bucket_name,
            os.path.join(image_prefix, batch,
                         'images_segmentation/troubleshoot'))
        image_csv_list = [x for x in image_csv_list if 'Image.csv' in x]
        image_df = helpful_functions.concat_some_csvs(s3, bucket_name,
                                                      image_csv_list,
                                                      'Image.csv')
        threshes = image_df['Threshold_FinalThreshold_Cells']
        percentile = numpy.percentile(threshes, 90)
        print("In ",
              len(image_csv_list) * num_series,
              "images, the 90th percentile was", percentile)

        pipeline_on_bucket_name = os.path.join(prefix, 'pipelines', batch,
                                               pipeline_name)
        local_pipeline_name = os.path.join('/tmp', pipeline_name)
        local_temp_pipeline_name = os.path.join(
            '/tmp',
            pipeline_name.split('.')[0] + '_edited.cppipe')
        with open(local_pipeline_name, 'wb') as f:
            s3.download_fileobj(bucket_name, pipeline_on_bucket_name, f)
        edit_id_secondary(local_pipeline_name, local_temp_pipeline_name,
                          percentile)
        with open(local_temp_pipeline_name, 'rb') as pipeline:
            s3.put_object(Body=pipeline,
                          Bucket=bucket_name,
                          Key=pipeline_on_bucket_name)
        print('Edited pipeline file')

        # Pull the file names we care about, and make the CSV
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            well_list = platedict.keys()
            bucket_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/images_corrected/painting'
            per_plate_csv = create_CSVs.create_CSV_pipeline3(
                eachplate, num_series, bucket_folder, well_list, range_skip)
            csv_on_bucket_name = prefix + 'load_data_csv/' + batch + '/' + eachplate + '/load_data_pipeline3B.csv'
            print('Created', csv_on_bucket_name)
            with open(per_plate_csv, 'rb') as a:
                s3.put_object(Body=a,
                              Bucket=bucket_name,
                              Key=csv_on_bucket_name)

        # Now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)
        print('app_name is', app_name)

        # Make the jobs
        create_batch_jobs.create_batch_jobs_3B(image_prefix, batch,
                                               pipeline_name,
                                               plate_and_well_list, out_range,
                                               app_name)

        # Start a cluster
        run_DCP.run_cluster(bucket_name, prefix, batch, config_step,
                            fleet_file_name,
                            len(plate_and_well_list) * len(out_range))

        # Create the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print('Go run the monitor now')
        return ('Cluster started')

예제 #18

0

파일 보기

파일: lambda_function.py 프로젝트: daisukekubota0823/pooled-cell-painting-image-processing

def lambda_handler(event, context):
    # Log the received event
    bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    keys = [x["s3"]["object"]["key"] for x in event["Records"]]
    if ".cppipe" not in key:
        plate = key.split("/")[-2].split("_")[0]
        batch = key.split("/")[-5]
        image_prefix = key.split(batch)[0]
        print(plate)
    else:
        batch = key.split("/")[-2]
        image_prefix = key.split("workspace")[0]
    prefix = os.path.join(image_prefix, "workspace/")

    print(
        f"Batch is {batch}\n Image prefix is {image_prefix}\n Prefix is {prefix}"
    )

    # Check that the barcodes.csv is present
    barcodepath = os.path.join(prefix, "metadata", batch)
    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=barcodepath)
    filelist = []
    for obj in response.get('Contents', []):
        filelist += obj
    if ".csv" not in filelist:
        print(f"No Barcodes.csv in {barcodepath}")
        return ("Barcodes.csv is missing")

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    print(("Loading", metadata_on_bucket_name))
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    plate_and_well_list = metadata["barcoding_plate_and_well_list"]
    image_dict = metadata["wells_with_all_cycles"]
    expected_cycles = metadata["barcoding_cycles"]
    platelist = list(image_dict.keys())
    num_series = int(metadata["barcoding_rows"]) * int(
        metadata["barcoding_columns"])
    if "barcoding_imperwell" in list(metadata.keys()):
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_files_per_well = (num_series * (
        (int(metadata["barcoding_cycles"]) * 4) + 1)) + 3
    num_sites = len(plate_and_well_list) * num_series

    # First let's check if it seems like the whole thing is done or not
    sqs = boto3.client("sqs")

    filter_prefix = image_prefix + batch + "/images_aligned/barcoding"
    # Expected length shows that all transfers (i.e. all wells) have at least started
    expected_len = (
        (len(plate_and_well_list) - 1) * expected_files_per_well) + 1

    done = helpful_functions.check_if_run_done(
        s3,
        bucket_name,
        filter_prefix,
        expected_len,
        current_app_name,
        prev_step_app_name,
        sqs,
        duplicate_queue_name,
    )

    if not done:
        print("Still work ongoing")
        return "Still work ongoing"
    else:
        # Pull the file names we care about, and make the CSV
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            well_list = list(platedict["1"].keys())
            bucket_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                             "/images_aligned/barcoding")
            per_plate_csv = create_CSVs.create_CSV_pipeline7(
                eachplate, num_series, expected_cycles, bucket_folder,
                well_list)
            csv_on_bucket_name = (prefix + "load_data_csv/" + batch + "/" +
                                  eachplate + "/load_data_pipeline7.csv")
            print(("Created", csv_on_bucket_name))
            with open(per_plate_csv, "rb") as a:
                s3.put_object(Body=a,
                              Bucket=bucket_name,
                              Key=csv_on_bucket_name)

        # now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        # make the jobs
        create_batch_jobs.create_batch_jobs_7(
            image_prefix,
            batch,
            pipeline_name,
            plate_and_well_list,
            list(range(num_series)),
            app_name,
        )

        # Start a cluster
        run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                            num_sites)

        # Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print("Go run the monitor now")
        return "Cluster started"

예제 #19

0

파일 보기

def lambda_handler(event, context):
    bucket = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    prefix, batchAndPipe = key.split("pipelines/")
    image_prefix = prefix.split("workspace")[0]
    batch = batchAndPipe.split("1_")[0][:-1]

    # Get the metadata file
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket, metadata_file_name, metadata_on_bucket_name)
    # Standard vs. SABER configs
    if "Channeldict" not in list(metadata.keys()):
        print("Update your metadata.json to include Channeldict")
        return "Update your metadata.json to include Channeldict"
    Channeldict = ast.literal_eval(metadata["Channeldict"])
    if len(Channeldict.keys()) == 1:
        SABER = False
        print("Not a SABER experiment")
    if len(Channeldict.keys()) > 1:
        SABER = True
        print("SABER experiment")

    # Calculate number of images from rows and columns in metadata
    num_series = int(metadata["painting_rows"]) * int(
        metadata["painting_columns"])
    # Overwrite rows x columns number series if images per well set in metadata
    if "painting_imperwell" in list(metadata.keys()):
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])

    # Get the list of images in this experiment
    if not SABER:
        parse_name_filter = "20X_CP_"
    if SABER:
        parse_name_filter = ""
    image_list_prefix = image_prefix + batch + "/images/"
    image_list = helpful_functions.paginate_a_folder(s3, bucket,
                                                     image_list_prefix)
    image_dict = helpful_functions.parse_image_names(
        image_list, filter_in=parse_name_filter, filter_out="copy")
    metadata["painting_file_data"] = image_dict
    helpful_functions.write_metadata_file(s3, bucket, metadata,
                                          metadata_file_name,
                                          metadata_on_bucket_name)

    # How many files/well indicates the well has all images present
    if metadata["one_or_many_files"] == "one":
        full_well_files = 1
    else:
        full_well_files = num_series

    # Pull the file names we care about, and make the CSV
    platelist = list(image_dict.keys())
    for eachplate in platelist:
        platedict = image_dict[eachplate]
        well_list = list(platedict.keys())
        Channelrounds = list(Channeldict.keys())
        # Only keep full wells
        print(
            f"{full_well_files} expect files per well and round for {eachplate}"
        )
        incomplete_wells = []
        for eachwell in well_list:
            for eachround in Channelrounds:
                per_well = platedict[eachwell][eachround]
                if len(per_well) != full_well_files:
                    incomplete_wells.append(eachwell)
                    print(
                        f"{eachwell} {eachround} doesn't have full well files. {len(per_well)} files found."
                    )
        if incomplete_wells:
            for well in incomplete_wells:
                del platedict[well]
        bucket_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                         "/images/" + eachplate + "/")
        illum_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                        "/illum/" + eachplate)
        per_plate_csv, per_plate_csv_2 = create_CSVs.create_CSV_pipeline1(
            eachplate,
            num_series,
            bucket_folder,
            illum_folder,
            platedict,
            metadata["one_or_many_files"],
            metadata["Channeldict"],
        )
        csv_on_bucket_name = (prefix + "load_data_csv/" + batch + "/" +
                              eachplate + "/load_data_pipeline1.csv")
        csv_on_bucket_name_2 = (prefix + "load_data_csv/" + batch + "/" +
                                eachplate + "/load_data_pipeline2.csv")
        with open(per_plate_csv, "rb") as a:
            s3.put_object(Body=a, Bucket=bucket, Key=csv_on_bucket_name)
        with open(per_plate_csv_2, "rb") as a:
            s3.put_object(Body=a, Bucket=bucket, Key=csv_on_bucket_name_2)

    # Now it's time to run DCP
    app_name = run_DCP.run_setup(bucket, prefix, batch, step)

    # Make a batch
    if not SABER:
        pipeline_name = "1_CP_Illum.cppipe"
    if SABER:
        pipeline_name = "1_SABER_CP_Illum.cppipe"
    create_batch_jobs.create_batch_jobs_1(image_prefix, batch, pipeline_name,
                                          platelist, app_name)

    # Start a cluster
    run_DCP.run_cluster(bucket, prefix, batch, step, fleet_file_name,
                        len(platelist))

    # Run the monitor
    run_DCP.run_monitor(bucket, prefix, batch, step)
    print("Go run the monitor now")

예제 #20

0

파일 보기

파일: lambda_function.py 프로젝트: daisukekubota0823/pooled-cell-painting-image-processing

def lambda_handler(event, context):
    # Log the received event
    bucket_name = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    keys = [x['s3']['object']['key'] for x in event['Records']]
    plate = key.split('/')[-2]
    batch = key.split('/')[-4]
    image_prefix = key.split(batch)[0]
    prefix = os.path.join(image_prefix, 'workspace/')

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    print('Loading', metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata['wells_with_all_cycles']
    num_series = int(metadata['barcoding_rows']) * int(
        metadata['barcoding_columns'])
    if "barcoding_imperwell" in metadata.keys():
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_cycles = int(metadata['barcoding_cycles'])
    platelist = image_dict.keys()

    pipe_name = pipeline_name
    if metadata["fast_or_slow_mode"] == 'fast':
        if 'fast' not in pipe_name:
            pipe_name = pipe_name[:-7] + '_fast.cppipe'
    else:
        if 'slow' not in pipe_name:
            pipe_name = pipe_name[:-7] + '_slow.cppipe'
    print(pipe_name)

    #First let's check if it seems like the whole thing is done or not
    sqs = boto3.client('sqs')

    filter_prefix = image_prefix + batch + '/illum'
    expected_len = int(metadata['barcoding_cycles']) * len(platelist) * 5

    done = helpful_functions.check_if_run_done(s3,
                                               bucket_name,
                                               filter_prefix,
                                               expected_len,
                                               current_app_name,
                                               prev_step_app_name,
                                               sqs,
                                               duplicate_queue_name,
                                               filter_in='Cycle')

    if not done:
        print('Still work ongoing')
        return ('Still work ongoing')
    else:
        #First thing first, let's make an easier-to-use plate and well list and save it
        plate_and_well_list = []
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            well_list = platedict['1'].keys()
            for eachwell in well_list:
                plate_and_well_list.append((eachplate, eachwell))
        metadata['barcoding_plate_and_well_list'] = plate_and_well_list
        helpful_functions.write_metadata_file(s3, bucket_name, metadata,
                                              metadata_file_name,
                                              metadata_on_bucket_name)
        #Pull the file names we care about, and make the CSV
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            bucket_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/images/' + eachplate
            illum_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/illum/' + eachplate
            per_plate_csv = create_CSVs.create_CSV_pipeline6(
                eachplate, num_series, expected_cycles, bucket_folder,
                illum_folder, platedict, metadata['one_or_many_files'],
                metadata["fast_or_slow_mode"])
            csv_on_bucket_name = prefix + 'load_data_csv/' + batch + '/' + eachplate + '/load_data_pipeline6.csv'
            print('Created', csv_on_bucket_name)
            with open(per_plate_csv, 'rb') as a:
                s3.put_object(Body=a,
                              Bucket=bucket_name,
                              Key=csv_on_bucket_name)

        # first let's just try to run the monitor on the last step, in case we haven't yet
        helpful_functions.try_a_shutdown(s3, bucket_name, prefix, batch,
                                         prev_step_num, prev_step_app_name)

        #now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        #make the jobs
        create_batch_jobs.create_batch_jobs_6(image_prefix, batch, pipe_name,
                                              plate_and_well_list, app_name,
                                              metadata['one_or_many_files'],
                                              num_series)

        #Start a cluster
        if metadata['one_or_many_files'] == 'one':
            njobs = len(plate_and_well_list) * 19
        else:
            njobs = len(plate_and_well_list) * num_series
        run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                            njobs)

        #Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print('Go run the monitor now')
        return ('Cluster started')

예제 #21

0

파일 보기

def lambda_handler(event, context):
    # Log the received event
    bucket_name = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    keys = [x['s3']['object']['key'] for x in event['Records']]
    if '.cppipe' not in key:
        plate = key.split('/')[-2].split('_')[0]
        batch = key.split('/')[-5]
        image_prefix = key.split(batch)[0]
        print(plate)
    else:
        batch = key.split('/')[-2]
        image_prefix = key.split('workspace')[0]
    prefix = os.path.join(image_prefix, 'workspace/')

    print(batch, image_prefix, prefix)

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    print('Loading', metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    plate_and_well_list = metadata['barcoding_plate_and_well_list']
    image_dict = metadata['wells_with_all_cycles']
    expected_cycles = metadata['barcoding_cycles']
    platelist = image_dict.keys()
    num_series = int(metadata['barcoding_rows']) * int(
        metadata['barcoding_columns'])
    if "barcoding_imperwell" in metadata.keys():
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_files_per_well = (num_series * (
        (int(metadata['barcoding_cycles']) * 4) + 1)) + 3
    num_sites = len(plate_and_well_list) * num_series

    #First let's check if it seems like the whole thing is done or not
    sqs = boto3.client('sqs')

    filter_prefix = image_prefix + batch + '/images_aligned/barcoding'
    #Expected length shows that all transfers (i.e. all wells) have at least started
    expected_len = (
        (len(plate_and_well_list) - 1) * expected_files_per_well) + 1

    done = helpful_functions.check_if_run_done(s3, bucket_name, filter_prefix,
                                               expected_len, current_app_name,
                                               prev_step_app_name, sqs,
                                               duplicate_queue_name)

    if not done:
        print('Still work ongoing')
        return ('Still work ongoing')
    else:
        #Pull the file names we care about, and make the CSV
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            well_list = platedict['1'].keys()
            bucket_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/images_aligned/barcoding'
            per_plate_csv = create_CSVs.create_CSV_pipeline7(
                eachplate, num_series, expected_cycles, bucket_folder,
                well_list)
            csv_on_bucket_name = prefix + 'load_data_csv/' + batch + '/' + eachplate + '/load_data_pipeline7.csv'
            print('Created', csv_on_bucket_name)
            with open(per_plate_csv, 'rb') as a:
                s3.put_object(Body=a,
                              Bucket=bucket_name,
                              Key=csv_on_bucket_name)

        # first let's just try to run the monitor on the last step, in case we haven't yet
        helpful_functions.try_a_shutdown(s3, bucket_name, prefix, batch,
                                         prev_step_num, prev_step_app_name)

        #now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        #make the jobs
        create_batch_jobs.create_batch_jobs_7(image_prefix, batch,
                                              pipeline_name,
                                              plate_and_well_list,
                                              range(num_series), app_name)

        #Start a cluster
        run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                            num_sites)

        #Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print('Go run the monitor now')
        return ('Cluster started')