예제 #1
0
def main():
    if len(sys.argv) != 2:
        print("Please provide one argument: manifest file!")
        sys.exit()

    manifest_file = sys.argv[1]

    # Check if the manifest file exists
    if not os.path.isfile(manifest_file):
        print("File {} does not exist. Exiting...".format(manifest_file))
        sys.exit()

    # Read manifest file
    options = {}
    with open(manifest_file, "r") as fp:
        for line in fp:
            dict = line.rstrip().split("=")
            options[dict[0]] = dict[1]
    fp.close()

    if 'flowcell_directory' not in options:
        print(
            'flowcell_directory is not specified in the manifest file. Exiting...'
        )
        sys.exit()

    if 'output_folder' not in options:
        print(
            'output_folder is not specified in the manifest file. Exiting...')
        sys.exit()

    if 'metadata_file' not in options:
        print(
            'metadata_file is not specified in the manifest file. Exiting...')
        sys.exit()

    if 'flowcell_barcode' not in options:
        print(
            'flowcell_barcode is not specified in the manifest file. Exiting...'
        )
        sys.exit()

    flowcell_directory = options['flowcell_directory']
    output_folder = options['output_folder']
    metadata_file = options['metadata_file']
    flowcell_barcode = options['flowcell_barcode']

    dropseq_folder = options[
        'dropseq_folder'] if 'dropseq_folder' in options else '/broad/macosko/bin/dropseq-tools'
    picard_folder = options[
        'picard_folder'] if 'picard_folder' in options else '/broad/macosko/bin/dropseq-tools/3rdParty/picard'
    STAR_folder = options[
        'STAR_folder'] if 'STAR_folder' in options else '/broad/macosko/bin/dropseq-tools/3rdParty/STAR-2.5.2a'
    scripts_folder = options[
        'scripts_folder'] if 'scripts_folder' in options else '/broad/macosko/jilong/slideseq_pipeline/scripts'
    email_address = options[
        'email_address'] if 'email_address' in options else ''

    if not os.path.isdir(flowcell_directory):
        print(
            "Folder {} does not exist. Exiting...".format(flowcell_directory))
        sys.exit()

    if not os.path.isfile(metadata_file):
        print("File {} does not exist. Exiting...".format(metadata_file))
        sys.exit()

    if not os.path.isdir(dropseq_folder):
        print("Folder {} does not exist. Exiting...".format(dropseq_folder))
        sys.exit()

    if not os.path.isdir(picard_folder):
        print("Folder {} does not exist. Exiting...".format(picard_folder))
        sys.exit()

    if not os.path.isdir(STAR_folder):
        print("Folder {} does not exist. Exiting...".format(STAR_folder))
        sys.exit()

    if not os.path.isdir(scripts_folder):
        print("Folder {} does not exist. Exiting...".format(scripts_folder))
        sys.exit()

    library_folder = options[
        'library_folder'] if 'library_folder' in options else '{}/libraries'.format(
            output_folder)

    runinfo_file = '{}/RunInfo.xml'.format(flowcell_directory)
    if not os.path.isfile(runinfo_file):
        print("File {} does not exist. Exiting...".format(runinfo_file))
        sys.exit()

    try:
        # Create directories
        if not os.path.isdir(output_folder):
            call(['mkdir', '-p', output_folder])
        if not os.path.isdir('{}/logs'.format(output_folder)):
            call(['mkdir', '-p', '{}/logs'.format(output_folder)])
        call(['mkdir', '-p', '{}/status'.format(output_folder)])
        if not os.path.isdir(library_folder):
            call(['mkdir', '-p', library_folder])
        if 'temp_folder' not in options:
            call(['mkdir', '-p', '{}/tmp'.format(output_folder)])
    except Exception as exp:
        print("EXCEPTION:!")
        print(exp)
        traceback.print_tb(exp.__traceback__, file=sys.stdout)
        print("Folder {} cannot be created. Exiting...".format(output_folder))
        sys.exit()

    log_file = '{}/logs/workflow.log'.format(output_folder)
    write_log(log_file, flowcell_barcode,
              "The Slide-seq alignment pipeline is starting to run. ")

    # Call run_preparation
    output_file = '{}/logs/run_preparation.log'.format(output_folder)
    submission_script = '{}/run_preparation.sh'.format(scripts_folder)
    call_args = [
        'qsub', '-o', output_file, '-l', 'h_vmem=20g', '-notify', '-l',
        'h_rt=10:0:0', '-j', 'y', '-P', 'macosko_lab', '-l', 'os=RedHat7',
        submission_script, manifest_file, scripts_folder, output_folder
    ]
    call_to_taskrunner(output_folder, call_args)

    if len(email_address) > 1:
        subject = "Submission received for " + flowcell_barcode
        content = "Thank you for your interest on the Slide-seq tools! We received your request. An email will be sent to you once the workflow finishes. "
        call_args = [
            'python', '{}/send_email.py'.format(scripts_folder), email_address,
            subject, content
        ]
        call(call_args)
예제 #2
0
def main():
    if len(sys.argv) != 3:
        print("Please provide two arguments: manifest file and lane ID!")
        sys.exit()

    manifest_file = sys.argv[1]
    lane = sys.argv[2]

    # Check if the manifest file exists
    if not os.path.isfile(manifest_file):
        print("File {} does not exist. Exiting...".format(manifest_file))
        sys.exit()

    # Read manifest file
    options = {}
    with open(manifest_file, "r") as fp:
        for line in fp:
            dict = line.rstrip().split("=")
            options[dict[0]] = dict[1]
    fp.close()

    flowcell_directory = options['flowcell_directory']
    output_folder = options['output_folder']
    metadata_file = options['metadata_file']
    flowcell_barcode = options['flowcell_barcode']

    library_folder = options[
        'library_folder'] if 'library_folder' in options else '{}/libraries'.format(
            output_folder)
    tmpdir = options[
        'temp_folder'] if 'temp_folder' in options else '{}/tmp'.format(
            output_folder)
    dropseq_folder = options[
        'dropseq_folder'] if 'dropseq_folder' in options else '/broad/macosko/bin/dropseq-tools'
    picard_folder = options[
        'picard_folder'] if 'picard_folder' in options else '/broad/macosko/bin/dropseq-tools/3rdParty/picard'
    STAR_folder = options[
        'STAR_folder'] if 'STAR_folder' in options else '/broad/macosko/bin/dropseq-tools/3rdParty/STAR-2.5.2a'
    scripts_folder = options[
        'scripts_folder'] if 'scripts_folder' in options else '/broad/macosko/jilong/slideseq_pipeline/scripts'
    is_NovaSeq = str2bool(
        options['is_NovaSeq']) if 'is_NovaSeq' in options else False
    is_NovaSeq_S4 = str2bool(
        options['is_NovaSeq_S4']) if 'is_NovaSeq_S4' in options else False
    num_slice_NovaSeq = int(
        options['num_slice_NovaSeq']) if 'num_slice_NovaSeq' in options else 10
    num_slice_NovaSeq_S4 = int(options['num_slice_NovaSeq_S4']
                               ) if 'num_slice_NovaSeq_S4' in options else 40
    email_address = options[
        'email_address'] if 'email_address' in options else ''

    basecalls_dir = '{}/Data/Intensities/BaseCalls'.format(flowcell_directory)
    log_file = '{}/logs/workflow.log'.format(output_folder)

    # Get read structure from RunInfo.xml
    runinfo_file = '{}/RunInfo.xml'.format(flowcell_directory)
    read_structure = get_read_structure(runinfo_file)

    # Get tile information from RunInfo.xml
    slice_id = {}
    slice_first_tile = {}
    slice_tile_limit = {}
    tile_nums = get_tiles(runinfo_file, lane)
    tile_cou = len(tile_nums)
    if ((not is_NovaSeq) and (not is_NovaSeq_S4)):
        slice_id[lane] = ['0']
        slice_first_tile[lane] = [str(tile_nums[0])]
        slice_tile_limit[lane] = [str(tile_cou)]
    else:
        slice_cou = num_slice_NovaSeq if is_NovaSeq else num_slice_NovaSeq_S4
        tile_cou_per_slice = (tile_cou // slice_cou) + 1
        slice_id[lane] = []
        slice_first_tile[lane] = []
        slice_tile_limit[lane] = []
        for i in range(slice_cou):
            if (tile_cou_per_slice * i >= tile_cou):
                break
            slice_id[lane].append(str(i))
            slice_first_tile[lane].append(
                str(tile_nums[tile_cou_per_slice * i]))
            slice_tile_limit[lane].append(str(tile_cou_per_slice))

    folder_running = '{}/status/running.processbarcodes_lane_{}'.format(
        output_folder, lane)
    folder_finished = '{}/status/finished.processbarcodes_lane_{}'.format(
        output_folder, lane)
    folder_failed = '{}/status/failed.processbarcodes_lane_{}'.format(
        output_folder, lane)

    try:
        call(['mkdir', '-p', folder_running])

        now = datetime.now()
        dt_string = now.strftime("%Y-%m-%d %H:%M:%S")
        print(dt_string)

        # Extract Illumina barcodes
        commandStr = 'java -Djava.io.tmpdir=' + tmpdir + ' -XX:+UseParallelOldGC -XX:ParallelGCThreads=1 -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xmx4000m '
        commandStr += '-jar ' + picard_folder + '/picard.jar ExtractIlluminaBarcodes TMP_DIR=' + tmpdir + ' VALIDATION_STRINGENCY=SILENT '
        commandStr += 'BASECALLS_DIR=' + basecalls_dir + ' OUTPUT_DIR=' + output_folder + '/' + lane + '/barcodes LANE=' + lane + ' '
        commandStr += 'READ_STRUCTURE=' + read_structure + ' BARCODE_FILE=' + output_folder + '/' + lane + '/barcode_params.txt '
        commandStr += 'METRICS_FILE=' + output_folder + '/' + lane + '/' + flowcell_barcode + '.' + lane + '.barcode_metrics COMPRESS_OUTPUTS=true NUM_PROCESSORS=4'
        write_log(
            log_file, flowcell_barcode, "ExtractIlluminaBarcodes for Lane " +
            lane + " Command=" + commandStr)
        os.system(commandStr)
        write_log(log_file, flowcell_barcode,
                  "ExtractIlluminaBarcodes for Lane " + lane + " is done. ")

        # Convert Illumina base calls to sam (unmapped.bam)
        for i in range(len(slice_id[lane])):
            commandStr = 'java -Djava.io.tmpdir=' + tmpdir + ' -XX:+UseParallelOldGC -XX:ParallelGCThreads=1 -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xmx10192m '
            commandStr += '-jar ' + picard_folder + '/picard.jar IlluminaBasecallsToSam TMP_DIR=' + tmpdir + ' VALIDATION_STRINGENCY=SILENT '
            commandStr += 'BASECALLS_DIR=' + basecalls_dir + ' LANE=' + lane + ' RUN_BARCODE=' + flowcell_barcode + ' NUM_PROCESSORS=4 '
            commandStr += 'READ_STRUCTURE=' + read_structure + ' LIBRARY_PARAMS=' + output_folder + '/' + lane + '/' + slice_id[
                lane][i] + '/library_params.txt INCLUDE_NON_PF_READS=false '
            commandStr += 'APPLY_EAMSS_FILTER=false MAX_READS_IN_RAM_PER_TILE=600000 ADAPTERS_TO_CHECK=null IGNORE_UNEXPECTED_BARCODES=true'
            commandStr += ' SEQUENCING_CENTER=BI BARCODES_DIR=' + output_folder + '/' + lane + '/barcodes FIRST_TILE=' + slice_first_tile[
                lane][i] + ' TILE_LIMIT=' + slice_tile_limit[lane][i]

            output_file = '{}/logs/run_barcodes2sam_lane_{}_{}.log'.format(
                output_folder, lane, slice_id[lane][i])
            submission_script = '{}/run_barcodes2sam.sh'.format(scripts_folder)
            call_args = [
                'qsub', '-o', output_file, '-l', 'h_vmem=100G', '-notify',
                '-l', 'h_rt=50:0:0', '-j', 'y', '-P', 'macosko_lab', '-l',
                'os=RedHat7', submission_script, manifest_file, commandStr,
                lane, slice_id[lane][i], scripts_folder, output_folder,
                '{}/{}'.format(output_folder, lane)
            ]
            call_to_taskrunner(output_folder, call_args)

        now = datetime.now()
        dt_string = now.strftime("%Y-%m-%d %H:%M:%S")
        print(dt_string)

        call(['mv', folder_running, folder_finished])
    except Exception as exp:
        print("EXCEPTION:!")
        print(exp)
        traceback.print_tb(exp.__traceback__, file=sys.stdout)
        if os.path.isdir(folder_running):
            call(['mv', folder_running, folder_failed])
        else:
            call(['mkdir', '-p', folder_failed])

        if len(email_address) > 1:
            subject = "Slide-seq workflow failed for " + flowcell_barcode
            content = "The Slide-seq workflow for lane " + lane + " failed at the step of processing barcodes. Please check the log file for the issues. "
            call_args = [
                'python', '{}/send_email.py'.format(scripts_folder),
                email_address, subject, content
            ]
            call(call_args)

        sys.exit()
예제 #3
0
def main():
    if len(sys.argv) != 4:
        print(
            "Please provide three arguments: manifest file, library ID and locus function!"
        )
        sys.exit()

    manifest_file = sys.argv[1]
    library = sys.argv[2]
    locus_function_list = sys.argv[3]

    # Check if the manifest file exists
    if not os.path.isfile(manifest_file):
        print("File {} does not exist. Exiting...".format(manifest_file))
        sys.exit()

    # Read manifest file
    options = {}
    with open(manifest_file, "r") as fp:
        for line in fp:
            dict = line.rstrip().split("=")
            options[dict[0]] = dict[1]
    fp.close()

    flowcell_directory = options['flowcell_directory']
    output_folder = options['output_folder']
    metadata_file = options['metadata_file']
    flowcell_barcode = options['flowcell_barcode']

    library_folder = options[
        'library_folder'] if 'library_folder' in options else '{}/libraries'.format(
            output_folder)
    tmpdir = options[
        'temp_folder'] if 'temp_folder' in options else '{}/tmp'.format(
            output_folder)
    dropseq_folder = options[
        'dropseq_folder'] if 'dropseq_folder' in options else '/broad/macosko/bin/dropseq-tools'
    picard_folder = options[
        'picard_folder'] if 'picard_folder' in options else '/broad/macosko/bin/dropseq-tools/3rdParty/picard'
    STAR_folder = options[
        'STAR_folder'] if 'STAR_folder' in options else '/broad/macosko/bin/dropseq-tools/3rdParty/STAR-2.5.2a'
    scripts_folder = options[
        'scripts_folder'] if 'scripts_folder' in options else '/broad/macosko/jilong/slideseq_pipeline/scripts'
    is_NovaSeq = str2bool(
        options['is_NovaSeq']) if 'is_NovaSeq' in options else False
    is_NovaSeq_S4 = str2bool(
        options['is_NovaSeq_S4']) if 'is_NovaSeq_S4' in options else False
    num_slice_NovaSeq = int(
        options['num_slice_NovaSeq']) if 'num_slice_NovaSeq' in options else 10
    num_slice_NovaSeq_S4 = int(options['num_slice_NovaSeq_S4']
                               ) if 'num_slice_NovaSeq_S4' in options else 40

    runinfo_file = '{}/RunInfo.xml'.format(flowcell_directory)
    log_file = '{}/logs/workflow.log'.format(output_folder)

    # Read info from metadata file
    lanes = []
    lanes_unique = []
    libraries = []
    libraries_unique = []
    barcodes = []
    bead_structures = []
    reference = ''
    run_barcodematching = False
    puckcaller_path = ''
    bead_type = ''
    sequence = 'AAGCAGTGGTATCAACGCAGAGTGAATGGG'
    base_quality = '10'
    min_transcripts_per_cell = '10'
    email_address = ''
    experiment_date = ''
    with open('{}/parsed_metadata.txt'.format(output_folder), 'r') as fin:
        reader = csv.reader(fin, delimiter='\t')
        rows = list(reader)
        row0 = rows[0]
        for i in range(1, len(rows)):
            row = rows[i]
            lanes.append(row[row0.index('lane')])
            if row[row0.index('lane')] not in lanes_unique:
                lanes_unique.append(row[row0.index('lane')])
            libraries.append(row[row0.index('library')])
            if row[row0.index('library')] not in libraries_unique:
                libraries_unique.append(row[row0.index('library')])
            barcodes.append(row[row0.index('sample_barcode')])
            bead_structures.append(row[row0.index('bead_structure')])
            if row[row0.index('library')] == library:
                reference = row[row0.index('reference')]
                sequence = row[row0.index('start_sequence')]
                base_quality = row[row0.index('base_quality')]
                min_transcripts_per_cell = row[row0.index(
                    'min_transcripts_per_cell')]
                email_address = row[row0.index('email')]
                run_barcodematching = str2bool(
                    row[row0.index('run_barcodematching')])
                puckcaller_path = row[row0.index('puckcaller_path')]
                bead_type = row[row0.index('bead_type')]
                experiment_date = row[row0.index('date')]
    fin.close()

    reference_folder = reference[:reference.rfind('/')]
    referencePure = reference[reference.rfind('/') + 1:]
    if (referencePure.endswith('.gz')):
        referencePure = referencePure[:referencePure.rfind('.')]
    referencePure = referencePure[:referencePure.rfind('.')]
    genome_dir = '{}/STAR'.format(reference_folder)
    intervals = '{}/{}.genes.intervals'.format(reference_folder, referencePure)
    annotations_file = '{}/{}.gtf'.format(reference_folder, referencePure)
    ref_flat = '{}/{}.refFlat'.format(reference_folder, referencePure)
    ribosomal_intervals = '{}/{}.rRNA.intervals'.format(
        reference_folder, referencePure)
    reference2 = referencePure + '.' + locus_function_list

    folder_running = '{}/status/running.write_bijective_mapping_{}_{}'.format(
        output_folder, library, locus_function_list)
    folder_finished = '{}/status/finished.write_bijective_mapping_{}_{}'.format(
        output_folder, library, locus_function_list)
    folder_failed = '{}/status/failed.write_bijective_mapping_{}_{}'.format(
        output_folder, library, locus_function_list)

    alignment_folder = '{}/{}_{}/{}/alignment'.format(library_folder,
                                                      experiment_date, library,
                                                      reference2)
    barcode_matching_folder = '{}/{}_{}/{}/barcode_matching'.format(
        library_folder, experiment_date, library, reference2)
    dge_gzfile = '{}/{}.digital_expression.txt.gz'.format(
        alignment_folder, library)
    dge_file = '{}/{}.digital_expression2.txt'.format(alignment_folder,
                                                      library)
    uniqueMappedDge_file = '{}/{}.UniqueMappedDge.txt'.format(
        alignment_folder, library)
    MappedDGEForR_file = '{}/MappedDGEForR.csv'.format(alignment_folder)

    call(['mkdir', '-p', folder_running])

    try:
        now = datetime.now()
        dt_string = now.strftime("%Y-%m-%d %H:%M:%S")
        print(dt_string)

        # UniqueMappedIlluminaBarcodes
        bci_file = '{}/{}_barcode_matching.txt'.format(barcode_matching_folder,
                                                       library)
        unique_bci_file = '{}/{}_unique_matched_illumina_barcodes.txt'.format(
            barcode_matching_folder, library)

        if not os.path.isfile(dge_file):
            os.system('gunzip -c {} > {}'.format(dge_gzfile, dge_file))

        location_file = '{}/{}_matched_bead_locations.txt'.format(
            barcode_matching_folder, library)
        genename_file = '{}/{}_genenames.txt'.format(barcode_matching_folder,
                                                     library)
        bcb_file = '{}/{}_unique_matched_beads.txt'.format(
            barcode_matching_folder, library)
        commandStr = 'perl ' + scripts_folder + '/get_unique_mapped_dge.pl ' + dge_file + ' ' + uniqueMappedDge_file + ' ' + genename_file + ' ' + bcb_file
        os.system(commandStr)

        # Call run_WriteBijectiveMapping
        # 'UniqueMappedBeads','UniqueMappedDGE','UniqueMappedIlluminaBarcodes','GeneNames'
        output_file = '{}/logs/run_WriteBijectiveMapping_{}_{}.log'.format(
            output_folder, library, locus_function_list)
        submission_script = '/broad/macosko/jilong/slideseq_pipeline/scripts/run_WriteBijectiveMapping.sh'
        call_args = [
            'qsub', '-o', output_file, '-l', 'h_vmem=65G', '-notify', '-l',
            'h_rt=10:0:0', '-j', 'y', '-P', 'macosko_lab', '-l', 'os=RedHat7',
            submission_script,
            '/broad/software/nonfree/Linux/redhat_7_x86_64/pkgs/matlab_2019a',
            scripts_folder, bcb_file, uniqueMappedDge_file, unique_bci_file,
            genename_file, location_file, puckcaller_path, output_folder
        ]
        call_to_taskrunner(output_folder, call_args)

        commandStr = 'perl ' + scripts_folder + '/txt2csv.pl ' + dge_file + ' ' + MappedDGEForR_file
        os.system(commandStr)

        if os.path.isfile(dge_file):
            call(['rm', dge_file])

        now = datetime.now()
        dt_string = now.strftime("%Y-%m-%d %H:%M:%S")
        print(dt_string)

        call(['mv', folder_running, folder_finished])
    except Exception as exp:
        print("EXCEPTION:!")
        print(exp)
        traceback.print_tb(exp.__traceback__, file=sys.stdout)
        if os.path.isdir(folder_running):
            call(['mv', folder_running, folder_failed])
        elif os.path.isdir(folder_waiting):
            call(['mv', folder_waiting, folder_failed])
        else:
            call(['mkdir', '-p', folder_failed])

        if len(email_address) > 1:
            subject = "Slide-seq workflow failed for " + flowcell_barcode
            content = "The Slide-seq workflow for " + library + " " + locus_function_list + " failed at the step of generating BijectiveMapping.mat. Please check the log file for the issues. "
            call_args = [
                'python', '{}/send_email.py'.format(scripts_folder),
                email_address, subject, content
            ]
            call(call_args)

        sys.exit()
예제 #4
0
def main():
    if len(sys.argv) != 4:
        print(
            "Please provide three arguments: manifest file, library ID and locus function list!"
        )
        sys.exit()

    manifest_file = sys.argv[1]
    library = sys.argv[2]
    locus_function_list = sys.argv[3]

    # Check if the manifest file exists
    if not os.path.isfile(manifest_file):
        print("File {} does not exist. Exiting...".format(manifest_file))
        sys.exit()

    # Read manifest file
    options = {}
    with open(manifest_file, "r") as fp:
        for line in fp:
            dict = line.rstrip().split("=")
            options[dict[0]] = dict[1]
    fp.close()

    flowcell_directory = options['flowcell_directory']
    output_folder = options['output_folder']
    metadata_file = options['metadata_file']
    flowcell_barcode = options['flowcell_barcode']

    library_folder = options[
        'library_folder'] if 'library_folder' in options else '{}/libraries'.format(
            output_folder)
    tmpdir = options[
        'temp_folder'] if 'temp_folder' in options else '{}/tmp'.format(
            output_folder)
    dropseq_folder = options[
        'dropseq_folder'] if 'dropseq_folder' in options else '/broad/macosko/bin/dropseq-tools'
    picard_folder = options[
        'picard_folder'] if 'picard_folder' in options else '/broad/macosko/bin/dropseq-tools/3rdParty/picard'
    STAR_folder = options[
        'STAR_folder'] if 'STAR_folder' in options else '/broad/macosko/bin/dropseq-tools/3rdParty/STAR-2.5.2a'
    scripts_folder = options[
        'scripts_folder'] if 'scripts_folder' in options else '/broad/macosko/jilong/slideseq_pipeline/scripts'
    is_NovaSeq = str2bool(
        options['is_NovaSeq']) if 'is_NovaSeq' in options else False
    is_NovaSeq_S4 = str2bool(
        options['is_NovaSeq_S4']) if 'is_NovaSeq_S4' in options else False
    num_slice_NovaSeq = int(
        options['num_slice_NovaSeq']) if 'num_slice_NovaSeq' in options else 10
    num_slice_NovaSeq_S4 = int(options['num_slice_NovaSeq_S4']
                               ) if 'num_slice_NovaSeq_S4' in options else 40

    basecalls_dir = '{}/Data/Intensities/BaseCalls'.format(flowcell_directory)
    runinfo_file = '{}/RunInfo.xml'.format(flowcell_directory)
    log_file = '{}/logs/workflow.log'.format(output_folder)

    # Read info from metadata file
    lanes = []
    lanes_unique = []
    libraries = []
    libraries_unique = []
    barcodes = []
    bead_structures = []
    reference = ''
    run_barcodematching = False
    puckcaller_path = ''
    bead_type = '180402'
    sequence = 'AAGCAGTGGTATCAACGCAGAGTGAATGGG'
    base_quality = '10'
    min_transcripts_per_cell = '10'
    email_address = ''
    experiment_date = ''
    gen_downsampling = False
    with open('{}/parsed_metadata.txt'.format(output_folder), 'r') as fin:
        reader = csv.reader(fin, delimiter='\t')
        rows = list(reader)
        row0 = rows[0]
        for i in range(1, len(rows)):
            row = rows[i]
            lanes.append(row[row0.index('lane')])
            if row[row0.index('lane')] not in lanes_unique:
                lanes_unique.append(row[row0.index('lane')])
            libraries.append(row[row0.index('library')])
            if row[row0.index('library')] not in libraries_unique:
                libraries_unique.append(row[row0.index('library')])
            barcodes.append(row[row0.index('sample_barcode')])
            bead_structures.append(row[row0.index('bead_structure')])
            if row[row0.index('library')] == library:
                reference = row[row0.index('reference')]
                sequence = row[row0.index('start_sequence')]
                base_quality = row[row0.index('base_quality')]
                min_transcripts_per_cell = row[row0.index(
                    'min_transcripts_per_cell')]
                email_address = row[row0.index('email')]
                run_barcodematching = str2bool(
                    row[row0.index('run_barcodematching')])
                puckcaller_path = row[row0.index('puckcaller_path')]
                bead_type = row[row0.index('bead_type')]
                experiment_date = row[row0.index('date')]
                if 'gen_downsampling' in row0:
                    gen_downsampling = str2bool(
                        row[row0.index('gen_downsampling')])
    fin.close()

    reference_folder = reference[:reference.rfind('/')]
    referencePure = reference[reference.rfind('/') + 1:]
    if (referencePure.endswith('.gz')):
        referencePure = referencePure[:referencePure.rfind('.')]
    referencePure = referencePure[:referencePure.rfind('.')]
    genome_dir = '{}/STAR'.format(reference_folder)
    intervals = '{}/{}.genes.intervals'.format(reference_folder, referencePure)
    annotations_file = '{}/{}.gtf'.format(reference_folder, referencePure)
    ref_flat = '{}/{}.refFlat'.format(reference_folder, referencePure)
    ribosomal_intervals = '{}/{}.rRNA.intervals'.format(
        reference_folder, referencePure)
    reference2 = referencePure + '.' + locus_function_list

    folder_running = '{}/status/running.analysis_spec_{}_{}'.format(
        output_folder, library, locus_function_list)
    folder_finished = '{}/status/finished.analysis_spec_{}_{}'.format(
        output_folder, library, locus_function_list)
    folder_failed = '{}/status/failed.analysis_spec_{}_{}'.format(
        output_folder, library, locus_function_list)

    analysis_folder = '{}/{}_{}'.format(library_folder, experiment_date,
                                        library)
    alignment_folder = '{}/{}/alignment/'.format(analysis_folder, reference2)
    barcode_matching_folder = '{}/{}/barcode_matching/'.format(
        analysis_folder, reference2)
    combined_bamfile = '{}/{}.bam'.format(analysis_folder, library)

    call(['mkdir', '-p', folder_running])

    try:
        now = datetime.now()
        dt_string = now.strftime("%Y-%m-%d %H:%M:%S")
        print(dt_string)

        # Select cells by num transcripts
        commandStr = dropseq_folder + '/SelectCellsByNumTranscripts '
        if is_NovaSeq or is_NovaSeq_S4:
            commandStr += '-m 24076m I=' + combined_bamfile + ' MIN_TRANSCRIPTS_PER_CELL=' + min_transcripts_per_cell + ' READ_MQ=' + base_quality
        else:
            commandStr += '-m 7692m I=' + combined_bamfile + ' MIN_TRANSCRIPTS_PER_CELL=' + min_transcripts_per_cell + ' READ_MQ=' + base_quality
        commandStr += ' OUTPUT=' + alignment_folder + library + '.' + min_transcripts_per_cell + '_transcripts_mq_' + base_quality + '_selected_cells.txt.gz '
        commandStr += 'TMP_DIR=' + tmpdir + ' VALIDATION_STRINGENCY=SILENT'
        if locus_function_list == 'exonic+intronic':
            commandStr += ' LOCUS_FUNCTION_LIST=INTRONIC'
        elif locus_function_list == 'intronic':
            commandStr += ' LOCUS_FUNCTION_LIST=null LOCUS_FUNCTION_LIST=INTRONIC'
        write_log(
            log_file, flowcell_barcode, "SelectCellsByNumTranscripts for " +
            library + " Command=" + commandStr)
        os.system(commandStr)
        write_log(log_file, flowcell_barcode,
                  "SelectCellsByNumTranscripts for " + library + " is done. ")

        # Call run_cmatcher
        if run_barcodematching:
            finish_file = '{}/BeadBarcodes_degenerate.finished'.format(
                analysis_folder)
            while 1:
                if os.path.isfile(finish_file):
                    call(['rm', finish_file])
                    break
                time.sleep(30)

            bead_barcode_file = '{}/BeadBarcodes_degenerate.txt'.format(
                analysis_folder)
            select_cell_gzfile = alignment_folder + library + '.' + min_transcripts_per_cell + '_transcripts_mq_' + base_quality + '_selected_cells.txt.gz'
            select_cell_file = alignment_folder + library + '.' + min_transcripts_per_cell + '_transcripts_mq_' + base_quality + '_selected_cells.txt'
            name = library + '.' + min_transcripts_per_cell + '_transcripts_mq_' + base_quality + '_selected_cells'
            name_shuffled = library + '.' + min_transcripts_per_cell + '_transcripts_mq_' + base_quality + '_selected_cells.shuffled'
            os.system('gunzip -c ' + select_cell_gzfile + ' > ' +
                      select_cell_file)

            select_cell_shuffled_file = alignment_folder + library + '.' + min_transcripts_per_cell + '_transcripts_mq_' + base_quality + '_selected_cells.shuffled.txt'
            with open(select_cell_shuffled_file, 'w') as fout:
                with open(select_cell_file, 'r') as fin:
                    for line in fin:
                        line = line.strip(' \t\n')
                        items = list(line)
                        random.shuffle(items)
                        bc = ''.join(items)
                        fout.write(bc + '\n')
                fin.close()
            fout.close()

            l = 0
            with open(select_cell_file, 'r') as fin:
                for line in fin:
                    l += 1
            fin.close()
            k = 10000
            ls = l // k

            for i in range(ls + 1):
                if i * k >= l:
                    break

                # real barcodes
                infile2 = '{}/{}_{}.txt'.format(alignment_folder, name,
                                                str(i + 1))
                commandStr = 'awk \'NR >= {} && NR <= {}\' {} > {}'.format(
                    str(i * k + 1), str((i + 1) * k), select_cell_file,
                    infile2)
                os.system(commandStr)

                file4 = '{}/{}_barcode_matching_distance_{}.txt'.format(
                    barcode_matching_folder, library, str(i + 1))
                file5 = '{}/{}_barcode_matching_{}.txt'.format(
                    barcode_matching_folder, library, str(i + 1))
                output_file = '{}/logs/run_cmatcher_{}_{}_{}.log'.format(
                    output_folder, library, locus_function_list, str(i + 1))
                submission_script = '{}/run_cmatcher.sh'.format(scripts_folder)
                call_args = [
                    'qsub', '-o', output_file, '-l', 'h_vmem=30G', '-notify',
                    '-l', 'h_rt=26:0:0', '-j', 'y', '-P', 'macosko_lab', '-l',
                    'os=RedHat7', submission_script, scripts_folder,
                    bead_barcode_file, infile2, file4, file5, bead_type,
                    output_folder, barcode_matching_folder
                ]
                call_to_taskrunner(output_folder, call_args)
                write_log(
                    log_file, flowcell_barcode, "Run CMatcher for " + library +
                    " " + reference2 + " " + str(i + 1))

                # shuffled barcodes
                infile2 = '{}/{}_{}.txt'.format(alignment_folder,
                                                name_shuffled, str(i + 1))
                commandStr = 'awk \'NR >= {} && NR <= {}\' {} > {}'.format(
                    str(i * k + 1), str((i + 1) * k),
                    select_cell_shuffled_file, infile2)
                os.system(commandStr)

                file4 = '{}/{}_barcode_matching_distance_shuffled_{}.txt'.format(
                    barcode_matching_folder, library, str(i + 1))
                file5 = '{}/{}_barcode_matching_shuffled_{}.txt'.format(
                    barcode_matching_folder, library, str(i + 1))
                output_file = '{}/logs/run_cmatcher_{}_{}_shuffled_{}.log'.format(
                    output_folder, library, locus_function_list, str(i + 1))
                submission_script = '{}/run_cmatcher.sh'.format(scripts_folder)
                call_args = [
                    'qsub', '-o', output_file, '-l', 'h_vmem=30G', '-notify',
                    '-l', 'h_rt=26:0:0', '-j', 'y', '-P', 'macosko_lab', '-l',
                    'os=RedHat7', submission_script, scripts_folder,
                    bead_barcode_file, infile2, file4, file5, bead_type,
                    output_folder, barcode_matching_folder
                ]
                call_to_taskrunner(output_folder, call_args)
                write_log(
                    log_file, flowcell_barcode, "Run CMatcher for " + library +
                    " " + reference2 + " " + str(i + 1))

            # Call run_cmatcher_combine
            output_file = '{}/logs/run_cmatcher_combine_{}_{}.log'.format(
                output_folder, library, locus_function_list)
            submission_script = '{}/run_cmatcher_combine.sh'.format(
                scripts_folder)
            call_args = [
                'qsub', '-o', output_file, '-l', 'h_vmem=10G', '-notify', '-l',
                'h_rt=48:0:0', '-j', 'y', '-P', 'macosko_lab', '-l',
                'os=RedHat7', submission_script, manifest_file, library,
                scripts_folder, locus_function_list, output_folder,
                '{}/{}'.format(analysis_folder, reference2)
            ]
            call_to_taskrunner(output_folder, call_args)

        # Generate digital expression files for all Illumina barcodes
        commandStr = dropseq_folder + '/DigitalExpression '
        if is_NovaSeq or is_NovaSeq_S4:
            commandStr += '-m 32268m '
        else:
            commandStr += '-m 7692m '
        commandStr += 'I=' + combined_bamfile + ' O=' + alignment_folder + library + '.AllIllumina.digital_expression.txt.gz '
        commandStr += 'SUMMARY=' + alignment_folder + library + '.AllIllumina.digital_expression_summary.txt EDIT_DISTANCE=1 READ_MQ=' + base_quality + ' MIN_BC_READ_THRESHOLD=0 '
        commandStr += 'CELL_BC_FILE=' + alignment_folder + library + '.' + min_transcripts_per_cell + '_transcripts_mq_' + base_quality + '_selected_cells.txt.gz TMP_DIR=' + tmpdir + ' '
        commandStr += 'OUTPUT_HEADER=false UEI=' + library + ' VALIDATION_STRINGENCY=SILENT'
        if locus_function_list == 'exonic+intronic':
            commandStr += ' LOCUS_FUNCTION_LIST=INTRONIC'
        elif locus_function_list == 'intronic':
            commandStr += ' LOCUS_FUNCTION_LIST=null LOCUS_FUNCTION_LIST=INTRONIC'
        write_log(
            log_file, flowcell_barcode, "DigitalExpression for " + library +
            " for all Illumina barcodes Command=" + commandStr)
        os.system(commandStr)
        write_log(
            log_file, flowcell_barcode, "DigitalExpression for " + library +
            " for all Illumina barcodes is done. ")

        if gen_downsampling:
            # Downsample bam
            downsample_folder = '{}/{}_{}/{}/downsample/'.format(
                library_folder, experiment_date, library, reference2)
            call(['mkdir', '-p', downsample_folder])
            f1 = '{}/{}.AllIllumina.digital_expression_summary.txt'.format(
                alignment_folder, library)
            f2 = '{}/{}_1.digital_expression_summary.txt'.format(
                downsample_folder, library)
            call(['cp', f1, f2])
            ratio = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
            for i in range(0, 9, 1):
                output_file = '{}/logs/gen_downsample_dge_{}_{}_{}.log'.format(
                    output_folder, library, reference2, str(ratio[i]))
                submission_script = '{}/gen_downsample_dge.sh'.format(
                    scripts_folder)
                call_args = [
                    'qsub', '-o', output_file, '-l', 'h_vmem=47G', '-notify',
                    '-l', 'h_rt=14:0:0', '-j', 'y', '-P', 'macosko_lab', '-l',
                    'os=RedHat7', submission_script, manifest_file, library,
                    scripts_folder, locus_function_list,
                    str(ratio[i]), output_folder, downsample_folder
                ]
                call_to_taskrunner(output_folder, call_args)

            # Call generate_plot_downsampling
            output_file = '{}/logs/generate_plot_downsampling_{}_{}.log'.format(
                output_folder, library, reference2)
            submission_script = '{}/generate_plot_downsampling.sh'.format(
                scripts_folder)
            call_args = [
                'qsub', '-o', output_file, '-l', 'h_vmem=10G', '-notify', '-l',
                'h_rt=40:0:0', '-j', 'y', '-P', 'macosko_lab', '-l',
                'os=RedHat7', submission_script, manifest_file, library,
                scripts_folder, locus_function_list, output_folder,
                barcode_matching_folder
            ]
            call_to_taskrunner(output_folder, call_args)

        if not run_barcodematching:
            if os.path.isdir(barcode_matching_folder):
                call(['rm', '-r', barcode_matching_folder])
            if len(email_address) > 1:
                subject = "Slide-seq workflow finished for " + flowcell_barcode
                content = "The Slide-seq workflow for " + library + "_" + locus_function_list + " is finished. Please check the output folder for the results. Thank you for using the Slide-seq tools! "
                call_args = [
                    'python', '{}/send_email.py'.format(scripts_folder),
                    email_address, subject, content
                ]
                call(call_args)

                output_file = '{}/logs/give_group_{}_{}.log'.format(
                    output_folder, library, reference2)
                submission_script = '{}/give_all_group_write.sh'.format(
                    scripts_folder)
                call_args = [
                    'qsub', '-o', output_file, '-l', 'h_vmem=5G', '-notify',
                    '-l', 'h_rt=10:0:0', '-j', 'y', '-P', 'macosko_lab', '-l',
                    'os=RedHat7', submission_script
                ]
                call_to_taskrunner(output_folder, call_args)

        now = datetime.now()
        dt_string = now.strftime("%Y-%m-%d %H:%M:%S")
        print(dt_string)

        call(['mv', folder_running, folder_finished])
    except Exception as exp:
        print("EXCEPTION:!")
        print(exp)
        traceback.print_tb(exp.__traceback__, file=sys.stdout)
        if os.path.isdir(folder_running):
            call(['mv', folder_running, folder_failed])
        elif os.path.isdir(folder_waiting):
            call(['mv', folder_waiting, folder_failed])
        else:
            call(['mkdir', '-p', folder_failed])

        if len(email_address) > 1:
            subject = "Slide-seq workflow failed for " + flowcell_barcode
            content = "The Slide-seq workflow for " + library + " " + locus_function_list + " failed at the step of running specific analysis. Please check the log file for the issues. "
            call_args = [
                'python', '{}/send_email.py'.format(scripts_folder),
                email_address, subject, content
            ]
            call(call_args)

        sys.exit()
예제 #5
0
def main():
    if len(sys.argv) != 3:
        print("Please provide two arguments: manifest file and library ID!")
        sys.exit()
    
    manifest_file = sys.argv[1]
    library = sys.argv[2]

    # Check if the manifest file exists
    if not os.path.isfile(manifest_file):
        print("File {} does not exist. Exiting...".format(manifest_file))
        sys.exit()

    # Read manifest file
    options = {}
    with open(manifest_file,"r") as fp:
        for line in fp:
            dict = line.rstrip().split("=")
            options[dict[0]] = dict[1]
    fp.close()
    
    flowcell_directory = options['flowcell_directory']
    output_folder = options['output_folder']
    metadata_file = options['metadata_file']
    flowcell_barcode = options['flowcell_barcode']
    
    library_folder = options['library_folder'] if 'library_folder' in options else '{}/libraries'.format(output_folder)
    tmpdir = options['temp_folder'] if 'temp_folder' in options else '{}/tmp'.format(output_folder)
    dropseq_folder = options['dropseq_folder'] if 'dropseq_folder' in options else '/broad/macosko/bin/dropseq-tools'
    picard_folder = options['picard_folder'] if 'picard_folder' in options else '/broad/macosko/bin/dropseq-tools/3rdParty/picard'
    STAR_folder = options['STAR_folder'] if 'STAR_folder' in options else '/broad/macosko/bin/dropseq-tools/3rdParty/STAR-2.5.2a'
    scripts_folder = options['scripts_folder'] if 'scripts_folder' in options else '/broad/macosko/jilong/slideseq_pipeline/scripts'
    is_NovaSeq = str2bool(options['is_NovaSeq']) if 'is_NovaSeq' in options else False
    is_NovaSeq_S4 = str2bool(options['is_NovaSeq_S4']) if 'is_NovaSeq_S4' in options else False
    num_slice_NovaSeq = int(options['num_slice_NovaSeq']) if 'num_slice_NovaSeq' in options else 10
    num_slice_NovaSeq_S4 = int(options['num_slice_NovaSeq_S4']) if 'num_slice_NovaSeq_S4' in options else 40
    
    basecalls_dir = '{}/Data/Intensities/BaseCalls'.format(flowcell_directory)
    runinfo_file = '{}/RunInfo.xml'.format(flowcell_directory)
    log_file = '{}/logs/workflow.log'.format(output_folder)
    
    # Read info from metadata file
    lanes = []
    lanes_unique = []
    libraries = []
    libraries_unique = []
    barcodes = []
    bead_structures = []
    reference = ''
    locus_function_list = 'exonic+intronic'
    run_barcodematching = False
    puckcaller_path = ''
    bead_type = '180402'
    email_address = ''
    experiment_date = ''
    gen_updistance_plot = False
    with open('{}/parsed_metadata.txt'.format(output_folder), 'r') as fin:
        reader = csv.reader(fin, delimiter='\t')
        rows = list(reader)
        row0 = rows[0]
        for i in range(1, len(rows)):
            row = rows[i]
            lanes.append(row[row0.index('lane')])
            if row[row0.index('lane')] not in lanes_unique:
                lanes_unique.append(row[row0.index('lane')])
            libraries.append(row[row0.index('library')])
            if row[row0.index('library')] not in libraries_unique:
                libraries_unique.append(row[row0.index('library')])
            barcodes.append(row[row0.index('sample_barcode')])
            bead_structures.append(row[row0.index('bead_structure')])
            if row[row0.index('library')] == library:
                reference = row[row0.index('reference')]
                locus_function_list = row[row0.index('locus_function_list')]
                email_address = row[row0.index('email')]
                run_barcodematching = str2bool(row[row0.index('run_barcodematching')])
                puckcaller_path = row[row0.index('puckcaller_path')]
                bead_type = row[row0.index('bead_type')]
                experiment_date = row[row0.index('date')]
                if 'gen_updistance_plot' in row0:
                    gen_updistance_plot = str2bool(row[row0.index('gen_updistance_plot')])
    fin.close()
    
    # Get tile information from RunInfo.xml
    slice_id = {}
    slice_first_tile = {}
    slice_tile_limit = {}
    for lane in lanes_unique:
        tile_nums = get_tiles(runinfo_file, lane)
        tile_cou = len(tile_nums)
        if ((not is_NovaSeq) and (not is_NovaSeq_S4)):
            slice_id[lane] = ['0']
            slice_first_tile[lane] = [str(tile_nums[0])]
            slice_tile_limit[lane] = [str(tile_cou)]
        else:
            slice_cou = num_slice_NovaSeq if is_NovaSeq else num_slice_NovaSeq_S4
            tile_cou_per_slice = (tile_cou // slice_cou) + 1
            slice_id[lane] = []
            slice_first_tile[lane] = []
            slice_tile_limit[lane] = []
            for i in range(slice_cou):
                if (tile_cou_per_slice * i >= tile_cou):
                    break
                slice_id[lane].append(str(i))
                slice_first_tile[lane].append(str(tile_nums[tile_cou_per_slice * i]))
                slice_tile_limit[lane].append(str(tile_cou_per_slice))
    
    folder_waiting = '{}/status/waiting.analysis_{}'.format(output_folder, library)
    folder_running = '{}/status/running.analysis_{}'.format(output_folder, library)
    folder_finished = '{}/status/finished.analysis_{}'.format(output_folder, library)
    folder_failed = '{}/status/failed.analysis_{}'.format(output_folder, library)
    
    analysis_folder = '{}/{}_{}'.format(library_folder, experiment_date, library)

    call(['mkdir', '-p', folder_waiting])
    
    if run_barcodematching:
        file2 = '{}/BeadBarcodes.txt'.format(puckcaller_path)
        file3 = '{}/BeadLocations.txt'.format(puckcaller_path)
        while 1:
            if os.path.isfile(file2) and os.path.isfile(file3):
                break
            time.sleep(30)
        
        call(['cp', file2, analysis_folder+'/'])
        call(['cp', file3, analysis_folder+'/'])
        bead_barcode_file = '{}/BeadBarcodes.txt'.format(analysis_folder)
        bead_location_file = '{}/BeadLocations.txt'.format(analysis_folder)

        l = 0
        with open(bead_barcode_file, 'r') as fin:
            for line in fin:
                l += 1
        fin.close()
        k = 10000
        ls = l // k

        for i in range(ls + 1):
            if i * k >= l:
                break;
            
            infile2 = '{}/BeadBarcodes_{}.txt'.format(analysis_folder, str(i + 1))
            commandStr = 'awk \'NR >= {} && NR <= {}\' {} > {}'.format(str(i * k + 1), str((i+1) * k), bead_barcode_file, infile2)
            os.system(commandStr)
            
            file4 = '{}/{}_barcode_matching_01_{}.txt'.format(analysis_folder, library, str(i + 1))
            file5 = '{}/{}_barcode_matching_2_{}.txt'.format(analysis_folder, library, str(i + 1))
            output_file = '{}/logs/run_cmatcher_beads_{}.log'.format(output_folder, str(i + 1))
            submission_script = '{}/run_cmatcher_beads.sh'.format(scripts_folder)
            call_args = ['qsub', '-o', output_file, '-l', 'h_vmem=5G', '-notify', '-l', 'h_rt=5:0:0', '-j', 'y', '-P', 'macosko_lab', '-l', 'os=RedHat7', submission_script, scripts_folder, infile2, bead_barcode_file, bead_location_file, file4, file5, bead_type, output_folder, analysis_folder]
            call_to_taskrunner(output_folder, call_args)
            
        # Call run_cmatcher_beads_combine
        output_file = '{}/logs/run_cmatcher_beads_combine_{}.log'.format(output_folder, library)
        submission_script = '{}/run_cmatcher_beads_combine.sh'.format(scripts_folder)
        call_args = ['qsub', '-o', output_file, '-l', 'h_vmem=5G', '-notify', '-l', 'h_rt=50:0:0', '-j', 'y', '-P', 'macosko_lab', '-l', 'os=RedHat7', submission_script, manifest_file, library, scripts_folder, output_folder, analysis_folder]
        call_to_taskrunner(output_folder, call_args)
    
    # Wait for all of run_alignment finish
    failed_list = []
    while 1:
        f = True
        for i in range(len(lanes)):
            if libraries[i] != library:
                continue
            for slice in slice_id[lanes[i]]:
                fol1 = '{}/status/finished.alignment_{}_{}_{}_{}'.format(output_folder, library, lanes[i], slice, barcodes[i])
                fol2 = '{}/status/failed.alignment_{}_{}_{}_{}'.format(output_folder, library, lanes[i], slice, barcodes[i])
                if (not os.path.isdir(fol1)) and (not os.path.isdir(fol2)):
                    f = False
                prefix_libraries = '{}/{}.{}.{}.{}'.format(analysis_folder, flowcell_barcode, lanes[i], slice, library)
                if (barcodes[i]):
                    prefix_libraries += '.'+barcodes[i]
                star_bamfile = prefix_libraries + '.star_gene_exon_tagged2.bam'
                if (os.path.isdir(fol1) or os.path.isdir(fol2)) and (not os.path.isfile(star_bamfile)):
                    if star_bamfile not in failed_list:
                        failed_list.append(star_bamfile)
                        if os.path.isdir(fol1):
                            call(['rm', '-r', fol1])
                        if os.path.isdir(fol2):
                            call(['rm', '-r', fol2])
                        if os.path.isfile(prefix_libraries+'.star.Log.final.out'):
                            call(['rm', prefix_libraries+'.star.Log.final.out'])
                        if os.path.isfile(prefix_libraries+'.star.Log.out'):
                            call(['rm', prefix_libraries+'.star.Log.out'])
                        if os.path.isfile(prefix_libraries+'.star.Log.progress.out'):
                            call(['rm', prefix_libraries+'.star.Log.progress.out'])
                        if os.path.isfile(prefix_libraries+'.star.SJ.out.tab'):
                            call(['rm', prefix_libraries+'.star.SJ.out.tab'])
                        if os.path.isdir(prefix_libraries+'.star._STARtmp'):
                            call(['rm', '-r', prefix_libraries+'.star._STARtmp'])
                        output_file = '{}/logs/run_alignment_{}_{}_{}.log'.format(output_folder, library, lanes[i], slice)
                        submission_script = '{}/run_alignment.sh'.format(scripts_folder)
                        call_args = ['qsub', '-o', output_file, '-l', 'h_vmem=60G', '-notify', '-l', 'h_rt=21:0:0', '-j', 'y', '-P', 'macosko_lab', '-l', 'os=RedHat7', submission_script, manifest_file, library, lanes[i], slice, barcodes[i], scripts_folder, output_folder, analysis_folder]
                        call_to_taskrunner(output_folder, call_args)
                        f = False
                    else:
                        write_log(log_file, flowcell_barcode, 'MergeSamFiles error: '+star_bamfile+' does not exist!')
                        raise Exception(star_bamfile + ' does not exist!')
        if f:
            break
        time.sleep(60)
    
    if os.path.isdir(folder_waiting):
        call(['mv', folder_waiting, folder_running])
    else:
        call(['mkdir', '-p', folder_running])
    
    try:
        now = datetime.now()
        dt_string = now.strftime("%Y-%m-%d %H:%M:%S")
        print(dt_string)
    
        # Merge bam files
        combined_bamfile = '{}/{}.bam'.format(analysis_folder, library)
        commandStr = 'java -Djava.io.tmpdir='+tmpdir+' -Dsamjdk.buffer_size=131072 -XX:+UseParallelOldGC -XX:ParallelGCThreads=1 -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xmx8192m '
        commandStr += '-jar '+picard_folder+'/picard.jar MergeSamFiles TMP_DIR='+tmpdir+' CREATE_INDEX=true CREATE_MD5_FILE=false VALIDATION_STRINGENCY=SILENT '
        commandStr += 'OUTPUT='+combined_bamfile+' SORT_ORDER=coordinate ASSUME_SORTED=true'
        for i in range(len(lanes)):
            if libraries[i] != library:
                continue
            for slice in slice_id[lanes[i]]:
                star_bamfile = '{}/{}.{}.{}.{}'.format(analysis_folder, flowcell_barcode, lanes[i], slice, library)
                if (barcodes[i]):
                    star_bamfile += '.'+barcodes[i]
                star_bamfile += '.star_gene_exon_tagged2.bam'
                if not os.path.isfile(star_bamfile):
                    write_log(log_file, flowcell_barcode, 'MergeSamFiles error: '+star_bamfile+' does not exist!')
                    raise Exception(star_bamfile + ' does not exist!')
                commandStr += ' INPUT='+star_bamfile
        write_log(log_file, flowcell_barcode, "MergeSamFiles for "+library+" Command="+commandStr)
        os.system(commandStr)
        write_log(log_file, flowcell_barcode, "MergeSamFiles for "+library+" is done. ")
        
        # Validate bam file
        commandStr = 'java -Djava.io.tmpdir='+tmpdir+' -Dsamjdk.buffer_size=131072 -XX:+UseParallelOldGC -XX:ParallelGCThreads=1 -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xmx16384m '
        commandStr += '-jar '+picard_folder+'/picard.jar ValidateSamFile TMP_DIR='+tmpdir+' VALIDATION_STRINGENCY=SILENT '
        commandStr += 'INPUT='+combined_bamfile+' MODE=SUMMARY'
        if (not is_NovaSeq) and (not is_NovaSeq_S4):
            commandStr += ' IGNORE=MISSING_PLATFORM_VALUE IGNORE=INVALID_VERSION_NUMBER'
        write_log(log_file, flowcell_barcode, "ValidateSamFile for "+library+" Command="+commandStr)
        os.system(commandStr)
        write_log(log_file, flowcell_barcode, "ValidateSamFile for "+library+" is done. ")
        
        # Call generate_plots
        output_file = '{}/logs/generate_plots_{}.log'.format(output_folder, library)
        submission_script = '{}/generate_plots.sh'.format(scripts_folder)
        call_args = ['qsub', '-o', output_file, '-l', 'h_vmem=55G', '-notify', '-l', 'h_rt=10:0:0', '-j', 'y', '-P', 'macosko_lab', '-l', 'os=RedHat7', submission_script, manifest_file, library, scripts_folder, output_folder, analysis_folder]
        call_to_taskrunner(output_folder, call_args)
        
        lists = locus_function_list.split(',')
        referencePure = reference[reference.rfind('/') + 1:]
        if (referencePure.endswith('.gz')):
            referencePure = referencePure[:referencePure.rfind('.')]
        referencePure = referencePure[:referencePure.rfind('.')]   
        for l in lists:
            call(['mkdir', '-p', '{}/{}.{}'.format(analysis_folder, referencePure, l)])
            call(['mkdir', '-p', '{}/{}.{}/alignment'.format(analysis_folder, referencePure, l)])
            
            if run_barcodematching:
                barcode_matching_folder = '{}/{}.{}/barcode_matching/'.format(analysis_folder, referencePure, l)
                call(['mkdir', '-p', barcode_matching_folder])
                for i in range(len(lanes)):
                    if libraries[i] != library:
                        continue
                    for slice in slice_id[lanes[i]]:
                        toCopyFile = '{}/{}.{}.{}.{}'.format(analysis_folder, flowcell_barcode, lanes[i], slice, library)
                        if (barcodes[i]):
                            toCopyFile += '.'+barcodes[i]
                        toCopyFile += '.star_gene_exon_tagged2.bam'
                        if os.path.isfile(toCopyFile):
                            call(['cp', toCopyFile, barcode_matching_folder])
            
            # Call run_analysis_spec
            output_file = '{}/logs/run_analysis_spec_{}_{}.log'.format(output_folder, library, l)
            submission_script = '{}/run_analysis_spec.sh'.format(scripts_folder)
            call_args = ['qsub', '-o', output_file, '-l', 'h_vmem=60G', '-notify', '-l', 'h_rt=24:0:0', '-j', 'y', '-P', 'macosko_lab', '-l', 'os=RedHat7', submission_script, manifest_file, library, scripts_folder, l, output_folder, '{}/{}.{}'.format(analysis_folder, referencePure, l)]
            call_to_taskrunner(output_folder, call_args)
        
        for i in range(len(lanes)):
            if libraries[i] != library:
                continue
            for slice in slice_id[lanes[i]]:
                toDeleteFile = '{}/{}.{}.{}.{}'.format(analysis_folder, flowcell_barcode, lanes[i], slice, library)
                if (barcodes[i]):
                    toDeleteFile += '.'+barcodes[i]
                toDeleteFile += '.star_gene_exon_tagged2.bam'
                if os.path.isfile(toDeleteFile):
                    call(['rm', toDeleteFile])
        
        # Combine check_alignments_quality files
        dict_unique_score = {}
        dict_multi_score = {}
        dict_unique_mismatch = {}
        dict_multi_mismatch = {}
        dict_unique_ratio = {}
        dict_multi_ratio = {}
        for i in range(len(lanes)):
            if libraries[i] != library:
                continue
            for slice in slice_id[lanes[i]]:
                star_samfile = '{}/{}.{}.{}.{}'.format(analysis_folder, flowcell_barcode, lanes[i], slice, library)
                if (barcodes[i]):
                    star_samfile += '.'+barcodes[i]
                star_samfile += '.star.Aligned.out.sam'
                file1 = star_samfile + ".unique.score";
                file2 = star_samfile + ".multi.score";
                file3 = star_samfile + ".unique.mismatch";
                file4 = star_samfile + ".multi.mismatch";
                file5 = star_samfile + ".unique.ratio";
                file6 = star_samfile + ".multi.ratio";
                if os.path.isfile(file1):
                    with open(file1, 'r') as fin:
                        for line in fin:
                            c1 = line.split('\t')[0].strip(' \t\n')
                            c2 = int(line.split('\t')[1].strip(' \t\n'))
                            if not c1 in dict_unique_score:
                                dict_unique_score[c1] = c2
                            else:
                                dict_unique_score[c1] += c2
                    fin.close()
                if os.path.isfile(file2):
                    with open(file2, 'r') as fin:
                        for line in fin:
                            c1 = line.split('\t')[0].strip(' \t\n')
                            c2 = int(line.split('\t')[1].strip(' \t\n'))
                            if not c1 in dict_multi_score:
                                dict_multi_score[c1] = c2
                            else:
                                dict_multi_score[c1] += c2
                    fin.close()
                if os.path.isfile(file3):
                    with open(file3, 'r') as fin:
                        for line in fin:
                            c1 = line.split('\t')[0].strip(' \t\n')
                            c2 = int(line.split('\t')[1].strip(' \t\n'))
                            if not c1 in dict_unique_mismatch:
                                dict_unique_mismatch[c1] = c2
                            else:
                                dict_unique_mismatch[c1] += c2
                    fin.close()
                if os.path.isfile(file4):
                    with open(file4, 'r') as fin:
                        for line in fin:
                            c1 = line.split('\t')[0].strip(' \t\n')
                            c2 = int(line.split('\t')[1].strip(' \t\n'))
                            if not c1 in dict_multi_mismatch:
                                dict_multi_mismatch[c1] = c2
                            else:
                                dict_multi_mismatch[c1] += c2
                    fin.close()
                if os.path.isfile(file5):
                    with open(file5, 'r') as fin:
                        for line in fin:
                            c1 = line.split('\t')[0].strip(' \t\n')
                            c2 = int(line.split('\t')[1].strip(' \t\n'))
                            if not c1 in dict_unique_ratio:
                                dict_unique_ratio[c1] = c2
                            else:
                                dict_unique_ratio[c1] += c2
                    fin.close()
                if os.path.isfile(file6):
                    with open(file6, 'r') as fin:
                        for line in fin:
                            c1 = line.split('\t')[0].strip(' \t\n')
                            c2 = int(line.split('\t')[1].strip(' \t\n'))
                            if not c1 in dict_multi_ratio:
                                dict_multi_ratio[c1] = c2
                            else:
                                dict_multi_ratio[c1] += c2
                    fin.close()
                call(['rm', file1])
                call(['rm', file2])
                call(['rm', file3])
                call(['rm', file4])
                call(['rm', file5])
                call(['rm', file6])
        
        outfile1 = '{}/{}.unique.score'.format(analysis_folder, library)
        outfile2 = '{}/{}.multi.score'.format(analysis_folder, library)
        outfile3 = '{}/{}.unique.mismatch'.format(analysis_folder, library)
        outfile4 = '{}/{}.multi.mismatch'.format(analysis_folder, library)
        outfile5 = '{}/{}.unique.ratio'.format(analysis_folder, library)
        outfile6 = '{}/{}.multi.ratio'.format(analysis_folder, library)
        with open(outfile1, 'w') as fout:
            for k in dict_unique_score:
                fout.write(k + '\t' + str(dict_unique_score[k]) + '\n')
        fout.close()
        with open(outfile2, 'w') as fout:
            for k in dict_multi_score:
                fout.write(k + '\t' + str(dict_multi_score[k]) + '\n')
        fout.close()
        with open(outfile3, 'w') as fout:
            for k in dict_unique_mismatch:
                fout.write(k + '\t' + str(dict_unique_mismatch[k]) + '\n')
        fout.close()
        with open(outfile4, 'w') as fout:
            for k in dict_multi_mismatch:
                fout.write(k + '\t' + str(dict_multi_mismatch[k]) + '\n')
        fout.close()
        with open(outfile5, 'w') as fout:
            for k in dict_unique_ratio:
                fout.write(k + '\t' + str(dict_unique_ratio[k]) + '\n')
        fout.close()
        with open(outfile6, 'w') as fout:
            for k in dict_multi_ratio:
                fout.write(k + '\t' + str(dict_multi_ratio[k]) + '\n')
        fout.close()
        
        # plot
        commandStr = 'python {}/plot_alignment_histogram.py {} {} {}'.format(scripts_folder, analysis_folder, library, library)
        os.system(commandStr)
        
        # Summary mapping rate
        totalreads = 0
        uniquereads = 0
        multireads = 0
        toomanyreads = 0
        for i in range(len(lanes)):
            if libraries[i] != library:
                continue
            for slice in slice_id[lanes[i]]:
                log_file = '{}/{}/{}/{}/{}/{}.{}.{}.{}.{}.star.Log.final.out'.format(output_folder, lanes[i], slice, library, barcodes[i], flowcell_barcode, lanes[i], slice, library, barcodes[i])
                if not os.path.isfile(log_file):
                    continue
                with open(log_file, "r") as f3:
                    for line3 in f3:
                        if get_key(line3) == 'Number of input reads':
                            totalreads += int(get_val(line3))
                        if get_key(line3) == 'Uniquely mapped reads number':
                            uniquereads += int(get_val(line3))
                        if get_key(line3) == 'Number of reads mapped to multiple loci':
                            multireads += int(get_val(line3))
                        if get_key(line3) == 'Number of reads mapped to too many loci':
                            toomanyreads += int(get_val(line3))
                f3.close()
        mismatch1 = 0
        mismatch2 = 0
        mismatch3 = 0
        if '1' in dict_unique_mismatch:
            mismatch1 += dict_unique_mismatch['1']
        if '1' in dict_multi_mismatch:
            mismatch1 += dict_multi_mismatch['1']
        if '2' in dict_unique_mismatch:
            mismatch2 += dict_unique_mismatch['2']
        if '2' in dict_multi_mismatch:
            mismatch2 += dict_multi_mismatch['2']
        if '3' in dict_unique_mismatch:
            mismatch3 += dict_unique_mismatch['3']
        if '3' in dict_multi_mismatch:
            mismatch3 += dict_multi_mismatch['3']
        output_file = '{}/{}_mapping_rate.txt'.format(analysis_folder, library)
        fout = open(output_file, 'w')
        fout.write('library\t{}\n'.format(library))
        fout.write('total_reads\t{}\n'.format(totalreads))
        fout.write('unique_aligned_reads\t{}\n'.format(uniquereads))
        fout.write('unique_aligned_ratio\t{}\n'.format('{0:.3g}'.format(100*uniquereads/totalreads)))
        fout.write('multi_aligned_reads\t{}\n'.format(multireads))
        fout.write('multi_aligned_ratio\t{}\n'.format('{0:.3g}'.format(100*multireads/totalreads)))
        fout.write('too_many_aligned_reads\t{}\n'.format(toomanyreads))
        fout.write('too_many_aligned_ratio\t{}\n'.format('{0:.3g}'.format(100*toomanyreads/totalreads)))
        fout.write('mismatch1_rate\t{}\n'.format('{0:.3g}'.format(100*mismatch1/totalreads)))
        fout.write('mismatch2_rate\t{}\n'.format('{0:.3g}'.format(100*mismatch2/totalreads)))
        fout.write('mismatch3_rate\t{}\n'.format('{0:.3g}'.format(100*mismatch3/totalreads)))
        fout.close()
        
        if gen_updistance_plot:
            for i in range(len(lanes)):
                if (libraries[i] != library):
                    continue
                    
                read1_file = '{}/{}.{}.read1.fastq'.format(analysis_folder, library, lanes[i])
                read2_file = '{}/{}.{}.read2.fastq'.format(analysis_folder, library, lanes[i])
                combined_bamfile = '{}/{}.{}.unmapped.bam'.format(analysis_folder, library, lanes[i])
                combined_baifile = '{}/{}.{}.unmapped.bai'.format(analysis_folder, library, lanes[i])
                
                commandStr = 'java -Djava.io.tmpdir='+tmpdir+' -Dsamjdk.buffer_size=131072 -XX:+UseParallelOldGC -XX:ParallelGCThreads=1 -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xmx8192m '
                commandStr += '-jar '+picard_folder+'/picard.jar MergeSamFiles TMP_DIR='+tmpdir+' CREATE_INDEX=true CREATE_MD5_FILE=false VALIDATION_STRINGENCY=SILENT '
                commandStr += 'OUTPUT='+combined_bamfile+' SORT_ORDER=coordinate ASSUME_SORTED=true'
                for slice in slice_id[lanes[i]]:
                    bamfile = '{}/{}.{}.{}.{}'.format(analysis_folder, flowcell_barcode, lanes[i], slice, library)
                    if (barcodes[i]):
                        bamfile += '.'+barcodes[i]
                    bamfile += '.unmapped.bam'
                    if not os.path.isfile(bamfile):
                        write_log(log_file, flowcell_barcode, 'MergeSamFiles error: '+bamfile+' does not exist!')
                        raise Exception(bamfile + ' does not exist!')
                    commandStr += ' INPUT='+bamfile
                os.system(commandStr)

                # Convert bam to fastq
                commandStr = 'java -Djava.io.tmpdir='+tmpdir+' -Xmx500m -XX:+UseParallelOldGC -XX:ParallelGCThreads=1 -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 '
                commandStr += '-jar '+picard_folder+'/picard.jar SamToFastq I='+combined_bamfile+' F='+read1_file+' F2='+read2_file+' VALIDATION_STRINGENCY=SILENT'
                os.system(commandStr)
            
                if os.path.isfile(combined_bamfile):
                    call(['rm', combined_bamfile])
                if os.path.isfile(combined_baifile):
                    call(['rm', combined_baifile])
                if os.path.isfile(read2_file):
                    call(['rm', read2_file])

                output_file = '{}/logs/run_analysis_UPdistance_{}_{}.log'.format(output_folder, library, lanes[i])
                submission_script = '{}/run_analysis_UPdistance.sh'.format(scripts_folder)
                call_args = ['qsub', '-o', output_file, '-l', 'h_vmem=35G', '-notify', '-l', 'h_rt=10:0:0', '-j', 'y', '-P', 'macosko_lab', '-l', 'os=RedHat7', submission_script, manifest_file, library, lanes[i], scripts_folder, output_folder, analysis_folder]
                call_to_taskrunner(output_folder, call_args)
                
                break
        
        now = datetime.now()
        dt_string = now.strftime("%Y-%m-%d %H:%M:%S")
        print(dt_string)
        
        call(['mv', folder_running, folder_finished])
    except Exception as exp:
        print("EXCEPTION:!")
        print(exp)
        traceback.print_tb(exp.__traceback__, file=sys.stdout)
        if os.path.isdir(folder_running):
            call(['mv', folder_running, folder_failed])
        elif os.path.isdir(folder_waiting):
            call(['mv', folder_waiting, folder_failed])
        else:
            call(['mkdir', '-p', folder_failed])
            
        if len(email_address) > 1:
            subject = "Slide-seq workflow failed for " + flowcell_barcode
            content = "The Slide-seq workflow for "+library+" failed at the step of running analysis. Please check the log file for the issues. "
            call_args = ['python', '{}/send_email.py'.format(scripts_folder), email_address, subject, content]
            call(call_args)
        
        sys.exit()
예제 #6
0
    # Call run pipeline
    if args.resubmit:
        output_file = '{}/logs/run_mergebarcodes.log'.format(output_dir)
        call_args = [
            'qsub', '-o', output_file, '-l', 'h_vmem=10G', '-notify', '-l',
            'h_rt=90:0:0', '-j', 'y', '-P', 'macosko_lab', '-l', 'os=RedHat7',
            submission_script, manifest_file, scripts_folder, output_dir
        ]
    else:
        output_file = '{}/logs/run_pipeline.log'.format(output_dir)
        call_args = [
            'qsub', '-o', output_file, '-l', 'h_vmem=10G', '-notify', '-l',
            'h_rt=5:00:0', '-j', 'y', '-P', 'macosko_lab', '-l', 'os=RedHat7',
            submission_script, manifest_file, scripts_folder, output_dir
        ]

    print('Command issued:')
    print(' '.join(call_args))

    if not args.dryrun:
        call_to_taskrunner(output_dir, call_args)

    submitted.append(flowcell)

    print('Flowcells {} submitted for processing'.format(' '.join(submitted)))
    skipped = [flowcell for flowcell in flowcells if flowcell not in submitted]
    if skipped:
        print(
            '\nFlowcells {} were skipped -- please see warnings above.'.format(
                '_'.join(skipped)))
예제 #7
0
def main():
    if len(sys.argv) != 4:
        print(
            "Please provide three arguments: manifest file, library ID and locus function list!"
        )
        sys.exit()

    manifest_file = sys.argv[1]
    library = sys.argv[2]
    locus_function_list = sys.argv[3]

    # Check if the manifest file exists
    if not os.path.isfile(manifest_file):
        print("File {} does not exist. Exiting...".format(manifest_file))
        sys.exit()

    # Read manifest file
    options = {}
    with open(manifest_file, "r") as fp:
        for line in fp:
            dict = line.rstrip().split("=")
            options[dict[0]] = dict[1]
    fp.close()

    flowcell_directory = options['flowcell_directory']
    output_folder = options['output_folder']
    metadata_file = options['metadata_file']
    flowcell_barcode = options['flowcell_barcode']

    library_folder = options[
        'library_folder'] if 'library_folder' in options else '{}/libraries'.format(
            output_folder)
    tmpdir = options[
        'temp_folder'] if 'temp_folder' in options else '{}/tmp'.format(
            output_folder)
    dropseq_folder = options[
        'dropseq_folder'] if 'dropseq_folder' in options else '/broad/macosko/bin/dropseq-tools'
    picard_folder = options[
        'picard_folder'] if 'picard_folder' in options else '/broad/macosko/bin/dropseq-tools/3rdParty/picard'
    STAR_folder = options[
        'STAR_folder'] if 'STAR_folder' in options else '/broad/macosko/bin/dropseq-tools/3rdParty/STAR-2.5.2a'
    scripts_folder = options[
        'scripts_folder'] if 'scripts_folder' in options else '/broad/macosko/jilong/slideseq_pipeline/scripts'
    is_NovaSeq = str2bool(
        options['is_NovaSeq']) if 'is_NovaSeq' in options else False
    is_NovaSeq_S4 = str2bool(
        options['is_NovaSeq_S4']) if 'is_NovaSeq_S4' in options else False
    num_slice_NovaSeq = int(
        options['num_slice_NovaSeq']) if 'num_slice_NovaSeq' in options else 10
    num_slice_NovaSeq_S4 = int(options['num_slice_NovaSeq_S4']
                               ) if 'num_slice_NovaSeq_S4' in options else 40

    # Read info from metadata file
    lanes = []
    lanes_unique = []
    libraries = []
    libraries_unique = []
    barcodes = []
    bead_structures = []
    reference = ''
    base_quality = '10'
    min_transcripts_per_cell = '10'
    email_address = ''
    bead_type = '180402'
    bead_structure = ''
    run_puckmatcher = False
    experiment_date = ''
    gen_read1_plot = False
    with open('{}/parsed_metadata.txt'.format(output_folder), 'r') as fin:
        reader = csv.reader(fin, delimiter='\t')
        rows = list(reader)
        row0 = rows[0]
        for i in range(1, len(rows)):
            row = rows[i]
            lanes.append(row[row0.index('lane')])
            if row[row0.index('lane')] not in lanes_unique:
                lanes_unique.append(row[row0.index('lane')])
            libraries.append(row[row0.index('library')])
            if row[row0.index('library')] not in libraries_unique:
                libraries_unique.append(row[row0.index('library')])
            barcodes.append(row[row0.index('sample_barcode')])
            bead_structures.append(row[row0.index('bead_structure')])
            if row[row0.index('library')] == library:
                reference = row[row0.index('reference')]
                base_quality = row[row0.index('base_quality')]
                min_transcripts_per_cell = row[row0.index(
                    'min_transcripts_per_cell')]
                email_address = row[row0.index('email')]
                bead_type = row[row0.index('bead_type')]
                bead_structure = row[row0.index('bead_structure')]
                run_puckmatcher = str2bool(
                    row[row0.index('run_barcodematching')])
                experiment_date = row[row0.index('date')]
                if 'gen_read1_plot' in row0:
                    gen_read1_plot = str2bool(
                        row[row0.index('gen_read1_plot')])
    fin.close()

    reference_folder = reference[:reference.rfind('/')]
    referencePure = reference[reference.rfind('/') + 1:]
    if (referencePure.endswith('.gz')):
        referencePure = referencePure[:referencePure.rfind('.')]
    referencePure = referencePure[:referencePure.rfind('.')]
    genome_dir = '{}/STAR'.format(reference_folder)
    intervals = '{}/{}.genes.intervals'.format(reference_folder, referencePure)
    annotations_file = '{}/{}.gtf'.format(reference_folder, referencePure)
    ref_flat = '{}/{}.refFlat'.format(reference_folder, referencePure)
    ribosomal_intervals = '{}/{}.rRNA.intervals'.format(
        reference_folder, referencePure)
    reference2 = referencePure + '.' + locus_function_list

    runinfo_file = '{}/RunInfo.xml'.format(flowcell_directory)
    log_file = '{}/logs/workflow.log'.format(output_folder)

    # Get tile information from RunInfo.xml
    slice_id = {}
    slice_first_tile = {}
    slice_tile_limit = {}
    for lane in lanes_unique:
        tile_nums = get_tiles(runinfo_file, lane)
        tile_cou = len(tile_nums)
        if ((not is_NovaSeq) and (not is_NovaSeq_S4)):
            slice_id[lane] = ['0']
            slice_first_tile[lane] = [str(tile_nums[0])]
            slice_tile_limit[lane] = [str(tile_cou)]
        else:
            slice_cou = num_slice_NovaSeq if is_NovaSeq else num_slice_NovaSeq_S4
            tile_cou_per_slice = (tile_cou // slice_cou) + 1
            slice_id[lane] = []
            slice_first_tile[lane] = []
            slice_tile_limit[lane] = []
            for i in range(slice_cou):
                if tile_cou_per_slice * i >= tile_cou:
                    break
                slice_id[lane].append(str(i))
                slice_first_tile[lane].append(
                    str(tile_nums[tile_cou_per_slice * i]))
                slice_tile_limit[lane].append(str(tile_cou_per_slice))

    analysis_folder = '{}/{}_{}'.format(library_folder, experiment_date,
                                        library)
    alignment_folder = '{}/{}/alignment/'.format(analysis_folder, reference2)
    barcode_matching_folder = '{}/{}/barcode_matching/'.format(
        analysis_folder, reference2)
    select_cell_file = alignment_folder + library + '.' + min_transcripts_per_cell + '_transcripts_mq_' + base_quality + '_selected_cells.txt'
    bead_barcode_file = '{}/BeadBarcodes_degenerate.txt'.format(
        analysis_folder)

    if not os.path.isfile(select_cell_file):
        write_log(
            log_file, flowcell_barcode, 'run_cmatcher_combine error: ' +
            select_cell_file + ' does not exist!')
        raise Exception('run_cmatcher_combine error: ' + select_cell_file +
                        ' does not exist!')

    folder_running = '{}/status/running.cmatcher_combine_{}_{}'.format(
        output_folder, library, reference2)
    folder_finished = '{}/status/finished.cmatcher_combine_{}_{}'.format(
        output_folder, library, reference2)
    folder_failed = '{}/status/failed.cmatcher_combine_{}_{}'.format(
        output_folder, library, reference2)

    try:
        call(['mkdir', '-p', folder_running])

        l = 0
        with open(select_cell_file, 'r') as fin:
            for line in fin:
                l += 1
        fin.close()
        k = 10000
        ls = l // k

        print('# selected cells: ' + str(l))

        while 1:
            f = True
            for i in range(ls + 1):
                if i * k >= l:
                    break
                file2 = '{}/{}_barcode_matching_{}.finished'.format(
                    barcode_matching_folder, library, str(i + 1))
                if not os.path.isfile(file2):
                    f = False
                    break
            if f:
                break
            time.sleep(30)

        while 1:
            f = True
            for i in range(ls + 1):
                if i * k >= l:
                    break
                file2 = '{}/{}_barcode_matching_shuffled_{}.finished'.format(
                    barcode_matching_folder, library, str(i + 1))
                if not os.path.isfile(file2):
                    f = False
                    break
            if f:
                break
            time.sleep(30)

        print('combine cmatcher outputs...')
        write_log(log_file, flowcell_barcode,
                  "Combine CMatcher outputs for " + library + " " + reference2)
        combined_cmatcher_file = '{}/{}_barcode_matching.txt'.format(
            barcode_matching_folder, library)
        with open(combined_cmatcher_file, 'w') as fout:
            fout.write(
                'IlluminaBarcodes\tProcessedIlluminaBarcodes\tBeadBarcodes\tDistance\tX\tY\n'
            )
            for i in range(ls + 1):
                if i * k >= l:
                    break
                file2 = '{}/{}_barcode_matching_{}.txt'.format(
                    barcode_matching_folder, library, str(i + 1))
                with open(file2, 'r') as fin:
                    j = 0
                    for line in fin:
                        j += 1
                        if j > 1:
                            fout.write(line)
                fin.close()
        fout.close()

        # Combine CMatcher logs
        combined_cmatcher_summary = '{}/{}_barcode_matching_summary.txt'.format(
            barcode_matching_folder, library)
        total = 0
        unique = 0
        multi = 0
        for i in range(ls + 1):
            if i * k >= l:
                break
            file2 = '{}/{}_barcode_matching_{}.txt.log'.format(
                barcode_matching_folder, library, str(i + 1))
            if not os.path.isfile(file2):
                continue
            j = 0
            with open(file2, 'r') as fin:
                for line in fin:
                    j += 1
                    s = line.split(':')[1]
                    s = s.strip(' \t\n')
                    if j == 1:
                        total += int(s)
                    elif j == 2:
                        unique += int(s)
                    elif j == 3:
                        multi += int(s)
            fin.close()
        with open(combined_cmatcher_summary, 'w') as fout:
            fout.write('Total # barcodes: {}\n'.format(str(total)))
            fout.write('# unique matched barcodes: {}, {}%\n'.format(
                str(unique), str(unique * 100 / total)))
            fout.write('# multiple matched barcodes: {}, {}%\n'.format(
                str(multi), str(multi * 100 / total)))
        fout.close()

        for i in range(ls + 1):
            if i * k >= l:
                break
            file1 = '{}/{}_barcode_matching_{}.txt'.format(
                barcode_matching_folder, library, str(i + 1))
            file2 = '{}/{}_barcode_matching_{}.finished'.format(
                barcode_matching_folder, library, str(i + 1))
            name = library + '.' + min_transcripts_per_cell + '_transcripts_mq_' + base_quality + '_selected_cells'
            file3 = '{}/{}_{}.txt'.format(alignment_folder, name, str(i + 1))
            file4 = '{}/{}_barcode_matching_{}.txt.log'.format(
                barcode_matching_folder, library, str(i + 1))
            if os.path.isfile(file1):
                call(['rm', file1])
            if os.path.isfile(file2):
                call(['rm', file2])
            if os.path.isfile(file3):
                call(['rm', file3])
            if os.path.isfile(file4):
                call(['rm', file4])

        combined_cmatcher_file2 = '{}/{}_barcode_matching_distance.txt'.format(
            barcode_matching_folder, library)
        with open(combined_cmatcher_file2, 'w') as fout:
            fout.write(
                'IlluminaBarcodes\tProcessedIlluminaBarcodes\tBeadBarcodes\tDistance\tX\tY\n'
            )
            for i in range(ls + 1):
                if i * k >= l:
                    break
                file2 = '{}/{}_barcode_matching_distance_{}.txt'.format(
                    barcode_matching_folder, library, str(i + 1))
                with open(file2, 'r') as fin:
                    j = 0
                    for line in fin:
                        j += 1
                        if j > 1:
                            fout.write(line)
                fin.close()
                call(['rm', file2])
        fout.close()

        # UniqueMappedIlluminaBarcodes
        bci = np.loadtxt(combined_cmatcher_file,
                         delimiter='\t',
                         dtype='str',
                         skiprows=1,
                         usecols=(1))
        bci = np.unique(bci)
        unique_bci_file = '{}/{}_unique_matched_illumina_barcodes.txt'.format(
            barcode_matching_folder, library)
        with open(unique_bci_file, 'w') as f1:
            for bc in bci:
                f1.write("%s\n" % bc)
        f1.close()

        os.system('gzip -c ' + unique_bci_file + ' > ' + unique_bci_file +
                  '.gz')

        write_log(
            log_file, flowcell_barcode, "Combine CMatcher outputs for " +
            library + " " + reference2 + " is done. ")

        # Get unique matched bead barcodes and locations
        print('Get unique matched bead barcodes and locations...')
        write_log(
            log_file, flowcell_barcode,
            "Get unique matched bead barcodes and locations for " + library +
            " " + reference2)
        dict = {}
        matched_bead_barcode_file = '{}/{}_matched_bead_barcodes.txt'.format(
            barcode_matching_folder, library)
        matched_bead_location_file = '{}/{}_matched_bead_locations.txt'.format(
            barcode_matching_folder, library)
        bead_location_forR = '{}/BeadLocationsForR.csv'.format(
            barcode_matching_folder)
        with open(matched_bead_barcode_file, 'w') as fout1:
            with open(matched_bead_location_file, 'w') as fout2:
                with open(bead_location_forR, 'w') as fout3:
                    fout3.write('barcodes,xcoord,ycoord\n')
                    with open(combined_cmatcher_file, 'r') as fin:
                        j = 0
                        for line in fin:
                            j += 1
                            if j > 1:
                                bc = line.split('\t')[2]
                                dist = line.split('\t')[3]
                                x = line.split('\t')[4]
                                y = line.split('\t')[5]
                                if not bc in dict:
                                    fout1.write(bc + '\n')
                                    fout2.write(dist + '\t' + x + '\t' + y)
                                    fout3.write(bc + ',' + x + ',' + y)
                                    dict[bc] = 1
                    fin.close()
                fout3.close()
            fout2.close()
        fout1.close()

        write_log(
            log_file, flowcell_barcode,
            "Get unique matched bead barcodes and locations for " + library +
            " " + reference2 + " is done. ")

        combined_cmatcher_file = '{}/{}_barcode_matching_shuffled.txt'.format(
            barcode_matching_folder, library)
        with open(combined_cmatcher_file, 'w') as fout:
            fout.write(
                'IlluminaBarcodes\tProcessedIlluminaBarcodes\tBeadBarcodes\tDistance\tX\tY\n'
            )
            for i in range(ls + 1):
                if i * k >= l:
                    break
                file2 = '{}/{}_barcode_matching_shuffled_{}.txt'.format(
                    barcode_matching_folder, library, str(i + 1))
                with open(file2, 'r') as fin:
                    j = 0
                    for line in fin:
                        j += 1
                        if j > 1:
                            fout.write(line)
                fin.close()
        fout.close()

        for i in range(ls + 1):
            if i * k >= l:
                break
            file1 = '{}/{}_barcode_matching_shuffled_{}.txt'.format(
                barcode_matching_folder, library, str(i + 1))
            file2 = '{}/{}_barcode_matching_shuffled_{}.finished'.format(
                barcode_matching_folder, library, str(i + 1))
            name = library + '.' + min_transcripts_per_cell + '_transcripts_mq_' + base_quality + '_selected_cells.shuffled'
            file3 = '{}/{}_{}.txt'.format(alignment_folder, name, str(i + 1))
            file4 = '{}/{}_barcode_matching_shuffled_{}.txt.log'.format(
                barcode_matching_folder, library, str(i + 1))
            if os.path.isfile(file1):
                call(['rm', file1])
            if os.path.isfile(file2):
                call(['rm', file2])
            if os.path.isfile(file3):
                call(['rm', file3])
            if os.path.isfile(file4):
                call(['rm', file4])

        combined_cmatcher_file2 = '{}/{}_barcode_matching_distance_shuffled.txt'.format(
            barcode_matching_folder, library)
        with open(combined_cmatcher_file2, 'w') as fout:
            fout.write(
                'IlluminaBarcodes\tProcessedIlluminaBarcodes\tBeadBarcodes\tDistance\tX\tY\n'
            )
            for i in range(ls + 1):
                if i * k >= l:
                    break
                file2 = '{}/{}_barcode_matching_distance_shuffled_{}.txt'.format(
                    barcode_matching_folder, library, str(i + 1))
                with open(file2, 'r') as fin:
                    j = 0
                    for line in fin:
                        j += 1
                        if j > 1:
                            fout.write(line)
                fin.close()
                call(['rm', file2])
        fout.close()

        # UniqueMappedIlluminaBarcodes
        bci = np.loadtxt(combined_cmatcher_file,
                         delimiter='\t',
                         dtype='str',
                         skiprows=1,
                         usecols=(1))
        bci = np.unique(bci)
        shuffled_bci_file = '{}/{}_unique_shuffled_illumina_barcodes.txt'.format(
            barcode_matching_folder, library)
        with open(shuffled_bci_file, 'w') as f1:
            for bc in bci:
                f1.write("%s\n" % bc)
        f1.close()

        os.system('gzip -c ' + shuffled_bci_file + ' > ' + shuffled_bci_file +
                  '.gz')

        for i in range(len(lanes)):
            if libraries[i] != library:
                continue
            for slice in slice_id[lanes[i]]:
                # Call tag_matched_bam
                output_file = '{}/logs/tag_matched_bam_{}_{}_{}_{}_{}.log'.format(
                    output_folder, library, lanes[i], slice, barcodes[i],
                    reference2)
                submission_script = '{}/tag_matched_bam.sh'.format(
                    scripts_folder)
                call_args = [
                    'qsub', '-o', output_file, '-l', 'h_vmem=5G', '-notify',
                    '-l', 'h_rt=10:0:0', '-j', 'y', '-P', 'macosko_lab', '-l',
                    'os=RedHat7', submission_script, manifest_file, library,
                    lanes[i], slice, barcodes[i], locus_function_list,
                    scripts_folder, output_folder, analysis_folder
                ]
                call_to_taskrunner(output_folder, call_args)

                # Call filter_unmapped_bam
                if gen_read1_plot:
                    output_file = '{}/logs/filter_unmapped_bam_{}_{}_{}_{}_{}.log'.format(
                        output_folder, library, lanes[i], slice, barcodes[i],
                        reference2)
                    submission_script = '{}/filter_unmapped_bam.sh'.format(
                        scripts_folder)
                    call_args = [
                        'qsub', '-o', output_file, '-l', 'h_vmem=10G',
                        '-notify', '-l', 'h_rt=10:0:0', '-j', 'y', '-P',
                        'macosko_lab', '-l', 'os=RedHat7', submission_script,
                        manifest_file, library, lanes[i], slice, barcodes[i],
                        locus_function_list, scripts_folder, output_folder,
                        analysis_folder
                    ]
                    call_to_taskrunner(output_folder, call_args)

        # Call generate_plots_cmatcher
        output_file = '{}/logs/generate_plots_cmatcher_{}_{}.log'.format(
            output_folder, library, reference2)
        submission_script = '{}/generate_plots_cmatcher.sh'.format(
            scripts_folder)
        call_args = [
            'qsub', '-o', output_file, '-l', 'h_vmem=30G', '-notify', '-l',
            'h_rt=40:0:0', '-j', 'y', '-P', 'macosko_lab', '-l', 'os=RedHat7',
            submission_script, manifest_file, library, scripts_folder,
            locus_function_list, output_folder,
            '{}/{}'.format(analysis_folder, reference2)
        ]
        call_to_taskrunner(output_folder, call_args)

        call(['mv', folder_running, folder_finished])
    except Exception as exp:
        print("EXCEPTION:!")
        print(exp)
        traceback.print_tb(exp.__traceback__, file=sys.stdout)
        if os.path.isdir(folder_running):
            call(['mv', folder_running, folder_failed])
        else:
            call(['mkdir', '-p', folder_failed])

        if len(email_address) > 1:
            subject = "Slide-seq workflow failed for " + flowcell_barcode
            content = "The Slide-seq workflow for " + library + " " + reference2 + " failed at the step of running cmatcher combine. Please check the log file for the issues. "
            call_args = [
                'python', '{}/send_email.py'.format(scripts_folder),
                email_address, subject, content
            ]
            call(call_args)

        sys.exit()
예제 #8
0
def main():
    if len(sys.argv) != 2:
        print("Please provide one argument: manifest file!")
        sys.exit()

    manifest_file = sys.argv[1]

    # Check if the manifest file exists
    if not os.path.isfile(manifest_file):
        print("File {} does not exist. Exiting...".format(manifest_file))
        sys.exit()

    # Read manifest file
    options = {}
    with open(manifest_file, "r") as fp:
        for line in fp:
            dict = line.rstrip().split("=")
            options[dict[0]] = dict[1]
    fp.close()

    flowcell_directory = options['flowcell_directory']
    output_folder = options['output_folder']
    metadata_file = options['metadata_file']
    flowcell_barcode = options['flowcell_barcode']

    library_folder = options[
        'library_folder'] if 'library_folder' in options else '{}/libraries'.format(
            output_folder)
    tmpdir = options[
        'temp_folder'] if 'temp_folder' in options else '{}/tmp'.format(
            output_folder)
    dropseq_folder = options[
        'dropseq_folder'] if 'dropseq_folder' in options else '/broad/macosko/bin/dropseq-tools'
    picard_folder = options[
        'picard_folder'] if 'picard_folder' in options else '/broad/macosko/bin/dropseq-tools/3rdParty/picard'
    scripts_folder = options[
        'scripts_folder'] if 'scripts_folder' in options else '/broad/macosko/jilong/slideseq_pipeline/scripts'
    is_NovaSeq = str2bool(
        options['is_NovaSeq']) if 'is_NovaSeq' in options else False
    is_NovaSeq_S4 = str2bool(
        options['is_NovaSeq_S4']) if 'is_NovaSeq_S4' in options else False
    num_slice_NovaSeq = int(
        options['num_slice_NovaSeq']) if 'num_slice_NovaSeq' in options else 10
    num_slice_NovaSeq_S4 = int(options['num_slice_NovaSeq_S4']
                               ) if 'num_slice_NovaSeq_S4' in options else 40
    email_address = options[
        'email_address'] if 'email_address' in options else ''

    basecalls_dir = '{}/Data/Intensities/BaseCalls'.format(flowcell_directory)
    log_file = '{}/logs/workflow.log'.format(output_folder)

    # Get read structure from RunInfo.xml
    runinfo_file = '{}/RunInfo.xml'.format(flowcell_directory)
    read_structure = get_read_structure(runinfo_file)

    # Parse metadata file
    write_log(log_file, flowcell_barcode, "Parse metadata file. ")
    commandStr = 'python ' + scripts_folder + '/parse_metadata.py -i ' + metadata_file + ' -r ' + runinfo_file + ' -o ' + '{}/parsed_metadata.txt'.format(
        output_folder)
    os.system(commandStr)

    # Read info from metadata file
    lanes = []
    lanes_unique = []
    libraries = []
    libraries_unique = []
    barcodes = []
    bead_structures = []
    references_unique = []
    locus_function_list_unique = []
    with open('{}/parsed_metadata.txt'.format(output_folder), 'r') as fin:
        reader = csv.reader(fin, delimiter='\t')
        rows = list(reader)
        row0 = rows[0]
        for i in range(1, len(rows)):
            row = rows[i]
            lanes.append(row[row0.index('lane')])
            if row[row0.index('lane')] not in lanes_unique:
                lanes_unique.append(row[row0.index('lane')])
            libraries.append(row[row0.index('library')])
            if row[row0.index('library')] not in libraries_unique:
                libraries_unique.append(row[row0.index('library')])
                references_unique.append(row[row0.index('reference')])
                locus_function_list_unique.append(
                    row[row0.index('locus_function_list')])
            barcodes.append(row[row0.index('sample_barcode')])
            bead_structures.append(row[row0.index('bead_structure')])
    fin.close()

    # Get tile information from RunInfo.xml
    slice_id = {}
    slice_first_tile = {}
    slice_tile_limit = {}
    for lane in lanes_unique:
        tile_nums = get_tiles(runinfo_file, lane)
        tile_cou = len(tile_nums)
        if ((not is_NovaSeq) and (not is_NovaSeq_S4)):
            slice_id[lane] = ['0']
            slice_first_tile[lane] = [str(tile_nums[0])]
            slice_tile_limit[lane] = [str(tile_cou)]
        else:
            slice_cou = num_slice_NovaSeq if is_NovaSeq else num_slice_NovaSeq_S4
            tile_cou_per_slice = (tile_cou // slice_cou) + 1
            slice_id[lane] = []
            slice_first_tile[lane] = []
            slice_tile_limit[lane] = []
            for i in range(slice_cou):
                if (tile_cou_per_slice * i >= tile_cou):
                    break
                slice_id[lane].append(str(i))
                slice_first_tile[lane].append(
                    str(tile_nums[tile_cou_per_slice * i]))
                slice_tile_limit[lane].append(str(tile_cou_per_slice))

    folder_running = '{}/status/running.run_preparation'.format(output_folder)
    folder_finished = '{}/status/finished.run_preparation'.format(
        output_folder)
    folder_failed = '{}/status/failed.run_preparation'.format(output_folder)

    try:
        call(['mkdir', '-p', folder_running])

        # Check if the input Illumina folder is in correct format
        commandStr = 'java -Djava.io.tmpdir=' + tmpdir + ' -XX:+UseParallelOldGC -XX:ParallelGCThreads=1 -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xmx8192m '
        commandStr += '-jar ' + picard_folder + '/picard.jar CheckIlluminaDirectory TMP_DIR=' + tmpdir + ' VALIDATION_STRINGENCY=SILENT '
        commandStr += 'BASECALLS_DIR=' + basecalls_dir + ' READ_STRUCTURE=' + read_structure
        if is_NovaSeq or is_NovaSeq_S4:
            commandStr += ' LINK_LOCS=false'
        for lane in lanes_unique:
            commandStr += ' L=' + lane
        write_log(log_file, flowcell_barcode,
                  "CheckIlluminaDirectory Command=" + commandStr)
        os.system(commandStr)
        write_log(log_file, flowcell_barcode,
                  "CheckIlluminaDirectory is done. ")

        # Create directories
        write_log(log_file, flowcell_barcode, "Creating directories. ")
        for lane in lanes_unique:
            call(['mkdir', '-p', '{}/{}'.format(output_folder, lane)])
            call(['mkdir', '-p', '{}/{}/barcodes'.format(output_folder, lane)])
            for slice in slice_id[lane]:
                call([
                    'mkdir', '-p', '{}/{}/{}'.format(output_folder, lane,
                                                     slice)
                ])
        for i in range(len(lanes)):
            for slice in slice_id[lane]:
                if not os.path.isdir('{}/{}/{}/{}'.format(
                        output_folder, lanes[i], slice, libraries[i])):
                    call([
                        'mkdir', '-p',
                        '{}/{}/{}/{}'.format(output_folder, lanes[i], slice,
                                             libraries[i])
                    ])
                if (barcodes[i]):
                    call([
                        'mkdir', '-p',
                        '{}/{}/{}/{}/{}'.format(output_folder, lanes[i], slice,
                                                libraries[i], barcodes[i])
                    ])

        # Generate barcode_params.txt that is needed by ExtractIlluminaBarcodes
        for lane in lanes_unique:
            write_log(log_file, flowcell_barcode,
                      "Generating barcode_params.txt for Lane " + lane)
            commandStr = 'python ' + scripts_folder + '/gen_barcode_params.py -i ' + output_folder + '/parsed_metadata.txt -o ' + output_folder + '/' + lane + '/barcode_params.txt -l ' + lane
            os.system(commandStr)

        # Generate library_params that is needed by IlluminaBasecallsToSam
        for lane in lanes_unique:
            write_log(log_file, flowcell_barcode,
                      "Generating library_params.txt for Lane " + lane)
            for slice in slice_id[lane]:
                commandStr = 'python ' + scripts_folder + '/gen_library_params.py -i ' + output_folder + '/parsed_metadata.txt -o ' + output_folder + '/' + lane + '/' + slice + '/library_params.txt -b '
                commandStr += output_folder + '/' + lane + '/' + slice + '/ -n ' + flowcell_barcode + '.' + lane + '.' + slice + ' -l ' + lane
                os.system(commandStr)

        # Call run_processbarcodes
        for lane in lanes_unique:
            output_file = '{}/logs/run_processbarcodes_lane_{}.log'.format(
                output_folder, lane)
            submission_script = '{}/run_processbarcodes.sh'.format(
                scripts_folder)
            call_args = [
                'qsub', '-o', output_file, '-l', 'h_vmem=70g', '-notify', '-l',
                'h_rt=06:0:0', '-j', 'y', '-P', 'macosko_lab', '-l',
                'os=RedHat7', submission_script, manifest_file, lane,
                scripts_folder, output_folder,
                '{}/{}'.format(output_folder, lane)
            ]
            call_to_taskrunner(output_folder, call_args)

        # Call run_mergebarcodes
        output_file = '{}/logs/run_mergebarcodes.log'.format(output_folder)
        submission_script = '{}/run_mergebarcodes.sh'.format(scripts_folder)
        call_args = [
            'qsub', '-o', output_file, '-l', 'h_vmem=5G', '-notify', '-l',
            'h_rt=100:0:0', '-j', 'y', '-P', 'macosko_lab', '-l', 'os=RedHat7',
            submission_script, manifest_file, scripts_folder, output_folder
        ]
        call_to_taskrunner(output_folder, call_args)

        call(['mv', folder_running, folder_finished])
    except Exception as exp:
        print("EXCEPTION:!")
        print(exp)
        traceback.print_tb(exp.__traceback__, file=sys.stdout)
        if os.path.isdir(folder_running):
            call(['mv', folder_running, folder_failed])
        else:
            call(['mkdir', '-p', folder_failed])

        if len(email_address) > 1:
            subject = "Slide-seq workflow failed for " + flowcell_barcode
            content = "The Slide-seq workflow failed at the step of preparation. Please check the log file for the issues. "
            call_args = [
                'python', '{}/send_email.py'.format(scripts_folder),
                email_address, subject, content
            ]
            call(call_args)

        sys.exit()