Example #1
0
def main():
    argparser = ArgumentParser()

    file_path_group = argparser.add_argument_group(title='File paths')
    file_path_group.add_argument('--input_files',
                                 type=str,
                                 help='S3 paths for input files',
                                 required=True)

    run_group = argparser.add_argument_group(title='Run command args')
    run_group.add_argument('--cmd_args',
                           type=str,
                           help='Arguments for preprocessing',
                           default=' ')

    argparser.add_argument('--working_dir', type=str, default='/scratch')
    argparser.add_argument('--results_path', type=str, default='/scratch')

    args = argparser.parse_args()

    working_dir = generate_working_dir(args.working_dir)

    # Download fastq files and reference files
    print('Downloading FASTQs')
    fastq_files = generate_input_string(args.input_files, working_dir)

    print('Running Kraken')
    kraken_folder = run_kraken(args.cmd_args, fastq_files, working_dir)

    print('Uploading results to %s' % args.results_path)
    upload_folder(args.results_path, kraken_folder)
    print('Cleaning up working dir')
    delete_working_dir(working_dir)
    print('Completed')
def main():
    argparser = parse4vcf2tiledb()
    args, extr = argparser.parse_known_args()
    print(args)

    if args.index == None:
      idx = int(os.getenv('AWS_BATCH_JOB_ARRAY_INDEX'))
    else:
      idx = args.index

    loader_path, callset_path, vid_path = download_required_files(args.loader_s3_path, args.callset_s3_path, args.vid_s3_path)

    if os.getenv('GETEBS'):
        initEBS(WORKDIR)
    else:
        if not os.path.exists(WORKDIR):
            os.mkdir(WORKDIR)

    if not os.path.exists(PVCFDIR):
       os.mkdir(PVCFDIR)

    # Run program
    run_vcf2tiledb_no_s3(WORKDIR, idx, loader_path, callset_path, vid_path, args.chr)

    if not os.getenv('SKIP_UPLOAD'):
      print("Uploading to %s" % (args.results_s3_path) )
      upload_folder(args.results_s3_path, PVCFDIR)

    print ("Completed vcf2tiledb")
Example #3
0
def main():
    argparser = ArgumentParser()

    file_path_group = argparser.add_argument_group(title='File paths')
    file_path_group.add_argument('--bam_s3_path',
                                 type=str,
                                 help='BAM s3 path',
                                 required=True)
    file_path_group.add_argument('--bai_s3_path',
                                 type=str,
                                 help='BAM Index s3 path',
                                 required=True)
    file_path_group.add_argument('--vcf_s3_path',
                                 type=str,
                                 help='VCF s3 path',
                                 required=True)
    file_path_group.add_argument('--reference_s3_path',
                                 type=str,
                                 help='Reference file s3 path',
                                 required=True)
    file_path_group.add_argument('--reference_index_s3_path',
                                 type=str,
                                 help='Reference file index s3 path',
                                 required=True)

    run_group = argparser.add_argument_group(title='Run command args')
    run_group.add_argument('--memory',
                           type=str,
                           help='Memory (in GB) for strelka to use',
                           default=28)
    run_group.add_argument('--cmd_args',
                           type=str,
                           help='Additional arguments for platypus',
                           default='')

    argparser.add_argument('--working_dir', type=str, default='/scratch')

    args = argparser.parse_args()

    working_dir = generate_working_dir(args.working_dir)

    print("Downloading bam")
    local_bam_path = download_file(args.bam_s3_path, working_dir)
    local_bai_path = download_file(args.bai_s3_path, working_dir)
    print("BAM and index donwloaded to %s and %s" %
          (local_bam_path, local_bai_path))
    print("Downloading reference")
    local_reference_path = download_file(args.reference_s3_path, working_dir)
    local_reference_index_path = download_file(args.reference_index_s3_path,
                                               working_dir)
    print("Reference downloaded to %s. Index to %s" %
          (local_reference_path, local_reference_index_path))
    print("Running Strelka")
    local_vcf_path = run_strelka(local_bam_path, local_reference_path,
                                 args.memory, args.cmd_args, working_dir)
    print("Uploading %s to %s" % (local_vcf_path, args.vcf_s3_path))
    upload_folder(args.vcf_s3_path, local_vcf_path)
    print('Cleaning up working dir')
    delete_working_dir(working_dir)
    print("Completed")
Example #4
0
def upload_bam(bam_s3_path, local_folder_path):
    """
    Uploads results folder containing the bam file (and associated output)
    :param bam_s3_path: S3 path to upload the alignment results to
    :param local_folder_path: local path containing the alignment results
    """

    upload_folder(bam_s3_path, local_folder_path)
Example #5
0
def upload_results(folder_s3_path, local_folder_path):
    """
    Uploads results folder (and associated output)
    :param folder_s3_path: S3 path to upload the assembly results to
    :param local_folder_path: local path containing the assembly results
    """

    upload_folder(folder_s3_path, local_folder_path)
Example #6
0
def upload_results(results_path, working_dir):
    """
    Uploads results folder containing the bam file (and associated output)
    :param bam_s3_path: S3 path to upload the alignment results to
    :param local_folder_path: local path containing the alignment results
    """

    upload_folder(results_path, os.path.join(working_dir, 'preqc'))
    upload_folder(results_path, os.path.join(working_dir, 'fragsizes'))
Example #7
0
def main():
    argparser = parse_args()
    args, extr = argparser.parse_known_args()
    print(args)

    ref_idx = args.ref_s3_path + '.fai'

    if args.sample_s3_path.rpartition('.')[-1] == 'bam':
        input_idx = args.sample_s3_path + '.bai'
    elif args.sample_s3_path.rpartition('.')[-1] == 'cram':
        input_idx = args.sample_s3_path + '.crai'
    else:
        raise Exception("Unknown input type")

    if os.getenv('GETEBS'):
        initEBS(WORKDIR)
    else:
        if not os.path.exists(WORKDIR):
            os.mkdir(WORKDIR)

    ref_path, ref_idx_path, input_path, idx_path, region_path = download_required_files(
        WORKDIR, args.ref_s3_path, ref_idx, args.sample_s3_path, input_idx,
        args.regions)

    output_dir = WORKDIR + '/results'
    os.mkdir(output_dir)
    output_prefix = output_dir + '/{SAMPLE}.hg38.realign.bqsr'.format(
        SAMPLE=args.sample_name)

    # Run program
    run_xatlas_basic(args.sample_name, input_path, ref_path, args.threads,
                     region_path, output_prefix, output_dir)

    if not os.getenv('SKIP_UPLOAD'):
        print("Uploading to %s" % (args.results_s3_path))
        upload_folder(args.results_s3_path, output_dir)

    print("Completed xatlas for %s" % args.sample_name)
Example #8
0
# check refdata
cmd = "ls -l refdata-cellranger-GRCh38-1.2.0"
subprocess.check_call(shlex.split(cmd))

# Run Cellranger MKFASTQ and COUNT
############################################

# cellranger mkfastqs
cmd = 'cellranger mkfastq --id=tiny-bcl-output --run=tiny-bcl/cellranger-tiny-bcl-1.2.0/ --csv=tiny-bcl/cellranger-tiny-bcl-samplesheet-1.2.0.csv'
subprocess.check_call(shlex.split(cmd))

#
# use full path for reference transcriptome
#

# cellranger count
cmd = 'cellranger count --id=test_sample --fastqs=tiny-bcl-output/outs/fastq_path/p1/s1 --sample=test_sample --expect-cells=1000 --localmem=3 --chemistry=SC3Pv2 --transcriptome=refdata-cellranger-GRCh38-1.2.0'
subprocess.check_call(shlex.split(cmd))

# # Copy data back to S3
# ###########################

# copy mkfastq outputs
s3_path = 's3://' + inst_bucket + '/tiny-bcl-output'
fcs_files_path = 'tiny-bcl-output'
upload_folder(s3_path, fcs_files_path)

# copy count outputs
s3_path = 's3://' + inst_bucket + '/test_sample'
fcs_files_path = 'test_sample'
upload_folder(s3_path, fcs_files_path)