Beispiel #1
0
def QNATool(cmd, project_name, source_dir, output_dir):
    # Make sure path variables are valid
    if not _valid_path(source_dir) or not _valid_path(output_dir):
        sys.exit(1)

    # Set config.py settings,
    # unless they have been already set.
    if config.project_name == '':
        config.project_name = project_name

    if config.source_dir == '':
        config.source_dir = source_dir

    if config.output_dir == '':
        project_dir = os.path.join(output_dir, project_name)
        if not os.path.exists(project_dir):
            os.mkdir(project_dir)
        config.output_dir = project_dir

    if config.chart_dir == '':
        chart_dir = os.path.join(config.output_dir, 'charts')
        if not os.path.exists(chart_dir):
            os.mkdir(chart_dir)
        config.chart_dir = chart_dir

    if config.DB == '':
        config.DB = os.path.join(project_dir, '{}.db'.format(project_name))

    # Run the pipeline
    pipeline.run()
def filterAlignments(infile, outfile):
    '''
    filter alignments to retain only those that
    have > 99% identity to the reference
    '''
    to_cluster = True
    statement = '''delta-filter -q -i 99 %(infile)s > %(outfile)s'''
    P.run()
def buildAlignmentCoordinates(infile, outfile):
    '''
    build coordinates file from alignment delta
    file
    '''
    to_cluster = True
    statement = '''show-coords -T -r %(infile)s > %(outfile)s'''
    P.run()
Beispiel #4
0
    def test_finalResults(self):
        output_path = path.join(TEST_DIR_PATH, 'output')
        pipeline.run(path.join(TEST_DIR_PATH, 'data'), output_path)

        mocksPath = './test/jsonMocks'
        comparators.assertStackMembersEqual(mocksPath, output_path)
        # The rendering of phrases is inconsistent, so do not compare them for now
        # comparators.assertPhrasesEqual(mocksPath, output_path)
        # TODO: un-hardcode this testcase
        comparators.assertVariablesEqualUnderTestcase(
            mocksPath, output_path, 'dotProduct([1, 2, 3], [4, 5, 6])')
Beispiel #5
0
    def test_finalResults(self):
        output_path = path.join(TEST_DIR_PATH, 'output')
        pipeline.run(path.join(TEST_DIR_PATH, 'data'), output_path)

        mocksPath = './test/jsonMocks'
        comparators.assertStackMembersEqual(mocksPath, output_path)
        # The rendering of phrases is inconsistent, so do not compare them for now
        # comparators.assertPhrasesEqual(mocksPath, output_path)
        # TODO: un-hardcode this testcase
        comparators.assertVariablesEqualUnderTestcase(
            mocksPath, output_path, 'dotProduct([1, 2, 3], [4, 5, 6])')
def alignContigsToReference(outfile, param1, param2):
    '''
    align the contigs to the reference genomes
    using nucmer
    '''
    to_cluster = True

    reffile, contigfile = param1, param2
    pattern = P.snip(os.path.basename(outfile), ".delta")
    statement = '''nucmer -p %(pattern)s %(reffile)s %(contigfile)s'''
    P.run()
    outf = os.path.basename(outfile)
    statement = '''mv %(outf)s alignment.dir'''
    P.run()
def createAlignmentBedFiles(infile, outfile):
    '''
    create bed files - the intervals are with respect to the 
    reference genome
    intervals are merged to form a non redundant alignment set
    '''
    # has to be output from show coords in tab format
    # also have to be sorted for mergeBed
    to_cluster = True
    statement = '''cat %(infile)s
                   | python %(scriptsdir)s/nucmer2bed.py -t bed4 --log=%(outfile)s.log 
                   | mergeBed -i - 
                   | gzip > %(outfile)s'''
    P.run()
Beispiel #8
0
def detect():
	print(request.files)
	file = request.files['file']
	print(file)

	print(request.form)

	if file and allowed_file(file.filename):
		filename = secure_filename(file.filename)
		image_string = base64.b64encode(file.read())

		mod_names = request.form['my-select'].split(',')

		mn_cleaned = []
		for mn in mod_names:
			if mn in modules:
				mn_cleaned.append(mn)
		
		detect_type = request.form['classifier']

		print(mod_names)
		print(mn_cleaned)
		print(detect_type)

		result = pipeline.run(image_string, mn_cleaned, detect_type)

		return jsonify({'success': True, 'data': result})

	return {'success': False}
Beispiel #9
0
def run():
    if pipeline.run(app.config["DATA_DIR"]):
        app.streamlit.terminate()
        app.streamlit = subprocess.Popen(streamlit_options)
        return Response()
    else:
        return Response(status=400)
Beispiel #10
0
def test():
  d = {'resource_prefix': '/damsl/projects',
       'io_dir':          'resource_io',
       'seq_id':          '3', 
       'session_dir':     'dummy_session2',
       'run_dir':         'run_1',
       'template_prefix': '/home/chao/june27/damsl.mddb/templates/',
       'template_dir':    'impsolv',  
       'charmm':          '/damsl/projects/molecules/software/MD/Charmm/c36b1/exec/gnu/charmm',
      }

  local_paths = resources.LocalResource.get_paths()

  ImpSolvPipeline.preprocess(d, local_paths)

  pipeline = ImpSolvPipeline(d)
  pipeline.run()
Beispiel #11
0
def test():
  d = {'protein_pdb': '/damsl/projects/molecules/data/odorant_gpcr/membrane_bilayer_md/charmm-gui/gpcrm_model_r03_proa.pdb',
       'resource_prefix': '/damsl/projects',
       'io_dir':          'resource_io',
       'session_dir':     'dummy_session',
       'run_dir':         'run_2',
       'template_prefix': '/home/kwong23/assembla/damsl.mddb/templates',  
       #'template_prefix': '/home/nutanong/mddb/templates/',  
       'trj_dir':         '/damsl/projects/molecules/data/Odorant_GPCR/receptor_models/olfr1393/oe_fred_high_gpcrm_model6/bilayer_frames',
       'pdb_fn':          'rosetta_modeller_model6_frame_0.pdb',
       'smiles':          'C(C)(C)(C)C1CCC(=O)CC1',
       'trj_id':          '21',
       't':               '1',
       'model_id':        '7',
       'receptor_id':     '1393',
       'dest_dir':        '/mddb/project/data/lddb_odorant2',
       'chimera':         '/damsl/projects/molecules/software/DD/Chimera/exec/bin/chimera',
       'sphgen_bin':           '/damsl/projects/molecules/software/DD/Dock/Dock/dock6/bin/sphgen',
       'molcharge_bin':           '/damsl/projects/molecules/software/DD/OpenEye/openeye/bin/molcharge',
       'omega2_bin':           '/damsl/projects/molecules/software/DD/OpenEye/openeye/bin/omega2',
       'dock6_bin':           '/damsl/projects/molecules/software/DD/Dock/Dock/dock6/bin/dock6',
       'dbname':          'lddb_odorant2',
       'sphgen':          '/damsl/projects/molecules/software/DD/Dock/Dock/dock6/bin/sphgen',
       'showbox':         '/damsl/projects/molecules/software/DD/Dock/Dock/dock6/bin/showbox',
       'grid':            '/damsl/projects/molecules/software/DD/Dock/Dock/dock6/bin/grid',
       'molcharge':       '/damsl/projects/molecules/software/DD/OpenEye/openeye/bin/molcharge',
       'omega2':          '/damsl/projects/molecules/software/DD/OpenEye/openeye/bin/omega2',
       'dock6':           '/damsl/projects/molecules/software/DD/Dock/Dock/dock6/bin/dock6',
       'sphere_selector': '/damsl/projects/molecules/software/DD/Dock/Dock/dock6/bin/sphere_selector',


      }

  try:
    import resources

    Dock6Pipeline.preprocess(d, resources.LocalResource.get_paths())

    pipeline = Dock6Pipeline(d)
    pipeline.run()
  except:
    pass
Beispiel #12
0
    def test_pipeline(self):
        pipeline.run(self._address_list_fname, self._postcode_reference_fname,
                     self._destination_fname)

        # Check that the file has been created
        self.assertTrue(os.path.isfile(self._destination_fname))

        # Check that it is a tsv that can be imported
        df = pd.read_csv(self._destination_fname, sep='\t')

        self.assertTrue(isinstance(df, pd.DataFrame))

        # Check the columns are what we expect
        expected_columns = [
            "urn", "Registration Date", "Latitude", "Longitude", "Location",
            "Unnamed: 5", "Postcode", "validated"
        ]

        self.assertTrue(all(df.columns == expected_columns))

        # Check we have the same rows
        self.assertEqual(df.shape[0], 6)
Beispiel #13
0
def start():
    # create response
    result, errors = run()
    response = make_response(result)
    # filename
    time_file_name = time.strftime("%Y-%m-%d-%H-%M-%S")
    # set headers
    response.headers.set('Content-Type', 'text/xml')
    response.headers.set('Content-Disposition',
                         'attachment',
                         filename='trend-seminar-xml-%s.xml' % time_file_name)
    # return response
    return response
Beispiel #14
0
def test2():
  d = {'resource_prefix': '/damsl/projects',
       'io_dir':          'resource_io',
       'session_dir':     'dummy_session3',
       'run_dir':         'run_1',
       'template_prefix': '/home/jbw/mddbnew2/damsl.mddb/templates/',
       'template_dir':    'impsolv',
       'dbname':          'mddb_bdslss3',
       'dbhost':          'mddb',
       'dbuser':          '******',
       'dbpass':          '******',
       'aaseq':            'MTAIIKEIVSRNKRRYQEDGFDLDLTYIYPNIIAMGFPAERLEGVYRNNIDDVVRFLDSKHKNHYKIYNLCAERHYDTAKFNCRVAQYPFEDHNPPQLELIKPFCEDLDQWLSEDDNHVAAIHCKAGKGRTGVMICAYLLHRGKFLKAQEALDFYGEVRTRDKKGVTIPSQRRYVYYYSYLLKNHLDYRPVALLFHKMMFETIPMFSGGTCNPQFVVCQLKVKIYSSNSGPTRREDKFMYFEFPQPLPVCGDIKVEFFHKQNKMLKKDKMFRFWVNTFFIPGPEETSEKVENGSLCDQEIDSICSIERADNDKEYLVLTLTKNDLDKANKDKANRYFSPNFKVKLYFTKTVEEPSNPEASSSTSVTPDVSDNEPDHYRYSDTTDSDPENEPFDEDQHTQITKV'
      }

  try:
    import resources
    local_paths = resources.LocalResource.get_paths()
  
    ImpSolvPipeline.preprocess(d, local_paths)
  
    pipeline = ImpSolvPipeline(d)
    pipeline.run()
  except Exception as e:
     print e
Beispiel #15
0
def listen():
    connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    connection.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    connection.bind((HOST, PORT))
    connection.listen(10)
    while True:
        current_connection, address = connection.accept()
        while True:
            data = current_connection.recv(2048)
            if not data:
                break
            if data == "START_PIPELINE":
                experiments = getExperiments()
                if (len(experiments) == 0):
                    current_connection.send("No hay experimentos")
                for e in experiments:
                    result = pipeline.run(e)
                    if result == None:
                        current_connection.send("OK")
                    else:
                        current_connection.send(result)
                data = "0"
Beispiel #16
0
import cv2
import pipeline
#
# img = cv2.imread('./data/test_images/challenge.jpg')
#
# result = pipeline.run(img)
#
# cv2.imshow('result', result)
# cv2.waitKey(0)
#
# cv2.destroyAllWindows()

# 동영상 테스트
cap = cv2.VideoCapture('./data/test_videos/solidYellowLeft.mp4')

while True:
    ok, frame = cap.read()
    if not ok:
        break

    result = pipeline.run(frame)

    cv2.imshow('result', result)
    key = cv2.waitKey(1)  # -1
    if key == ord('x'):
        break

cap.release()
cv2.destroyAllWindows()
#
Beispiel #17
0
def raw():
    f = request.stream
    pipeline.run(f)
    return redirect(url_for('static', filename="post.html"))
def main(argv=None):
    args, pipeline_args = get_args(argv)
    pipeline_args.append('--setup_file={}'.format(SETUP_FILE_PATH))

    pipeline.run(args.bq_dataset_name, args.embeddings_table_name,
                 args.output_dir, pipeline_args)
Beispiel #19
0
        if logged():
            if database.isFileAllowedFromUser( fileid, session.user ):
                filename = database.getFileFullName( fileid )
                with open( filename, "r" ) as f:
                    return f.read()
            else:
                return get_render().notallowed()
        else:
            raise web.seeother('/')

#-------------------------------------------------------------------------------
class Job:
    def GET( self, jobid ):
        if logged():
            if database.isJobFromUser( jobid, session.user ):
                jobinfo = database.getJobInfo( jobid )
                return get_render().job( jobinfo )
            else:
                return get_render().notallowed()
        else:
            raise web.seeother('/')

#-------------------------------------------------------------------------------
if __name__ == "__main__":
    database.init()
    p = pipeline.run()
    app.run()
    p.terminate()

#-------------------------------------------------------------------------------
Beispiel #20
0
def multiplexing():
    """Cell free pipeline.
    """

    parser = argparse.ArgumentParser()
    parser.add_argument('input_sam',
                        help="Paths of input sorted undeduplicated sam file.")
    parser.add_argument(
        "-n",
        "--name",
        help=
        "Sample name used to name output files. Will be guessed from input sam if not provide.",
        default="")
    parser.add_argument(
        "-u",
        "--umi",
        help=
        "UMI type (prism, thruplex_hv or thruplex) or empty strng if no umis.",
        default="")
    parser.add_argument(
        "-m",
        "--min-family-size",
        help="Minimum family size. Families smaller than this will be filtered",
        type=int,
        default=1)
    parser.add_argument("-l",
                        "--interval",
                        help="Step size to increment downsampling by.",
                        type=int,
                        required=True)
    parser.add_argument(
        "-r",
        "--reference",
        help="Path to reference genome or containing directory.",
        required=True)
    parser.add_argument(
        "-p",
        "--panel",
        help="Path to covermi panel which must contain targets bedfile.",
        required=True)
    parser.add_argument("-o",
                        "--output",
                        help="Path to write output files to.",
                        default=".")
    args = parser.parse_args()

    threads = run(["getconf", "_NPROCESSORS_ONLN"]).stdout.strip()

    if not args.name:
        args.name = input_sam.split("/")[-1].split(".")[0]

    args.reference = os.path.abspath(args.reference)
    args.input_sam = os.path.abspath(args.input_sam)
    args.panel = os.path.abspath(args.panel)
    os.chdir(args.output)

    args.reference = (glob.glob(f"{args.reference}/*.fna") +
                      glob.glob(f"{args.reference}/*.fa") +
                      glob.glob(f"{args.reference}/*.fasta") +
                      [args.reference])[0]
    ref_dir = os.path.dirname(args.reference)
    if glob.glob(f"{ref_dir}/*.sa"):
        bwa = "bwa"
    elif glob.glob(f"{ref_dir}/*.0123"):
        bwa = "bwa-mem2"
    else:
        sys.exit("Invalid bwa indexes")
    targets_bedfile = (glob.glob(f"{args.panel}/*.bed") + [None])[0]
    stats = f"{args.name}.stats.json"
    pipe = Pipe()
    output_file = f"{args.name}.multiplexing.tsv"

    namesorted_sam = f"{args.name}.namesorted.sam"
    pipe([
        "samtools", "sort", "-n", "-o", namesorted_sam, "-@", threads,
        args.input_sam
    ])

    with open(output_file, "wt") as f_out:
        writer = csv.writer(f_out)
        writer.writerow([
            "sample", "reads", "mean_depth", "mean_family_size",
            "singleton_rate", "triplicate_plus_rate", "quadruplicate_plus_rate"
        ])

        requested_reads = 0
        returned_reads = 0
        while returned_reads == requested_reads:
            requested_reads += args.interval

            downsampled_sam = f"{args.name}.downsampled.sam"
            cp = pipe([
                "downsample_sam", "--output", downsampled_sam, "--number",
                requested_reads, namesorted_sam
            ],
                      stderr=subprocess.PIPE)
            returned_reads = int(cp.stderr.decode())

            sorted_sam = f"{args.name}.sorted.downsampled.sam"
            pipe([
                "samtools", "sort", "-o", sorted_sam, "-@", threads,
                downsampled_sam
            ])
            os.unlink(downsampled_sam)

            deduplicated_fastq = f"{args.name}.deduplicated.fastq"
            pipe([
                "elduderino", "--output", deduplicated_fastq, "--stats", stats,
                "--min-family-size", args.min_family_size, "--umi", args.umi,
                sorted_sam
            ])
            os.unlink(sorted_sam)

            deduplicated_sam = f"{args.name}.deduplicated.sam"
            with open(deduplicated_sam, "wb") as f:
                pipe(
                    [
                        bwa,
                        "mem",
                        "-t",
                        threads,
                        "-p",  # interleaved paired end fastq
                        "-C",  # Append fastq comment to sam
                        "-Y",  # Soft clip non-primary reads
                        args.reference,
                        deduplicated_fastq
                    ],
                    stdout=f)
            os.unlink(deduplicated_fastq)

            bam = f"{args.name}.bam"
            pipe([
                "samtools", "sort", "-o", bam, "-@", threads, deduplicated_sam
            ])
            os.unlink(deduplicated_sam)

            pipe([
                "covermi_stats", "--panel", args.panel, "--stats", stats, bam
            ])
            os.unlink(bam)

            with open(stats, "rt") as f:
                data = json.load(f)
            os.unlink(stats)
            writer.writerow([
                args.name, returned_reads, data["coverage"]["mean_depth"],
                data["mean_family_size"], data["singleton_rate"],
                data["triplicate_plus_rate"], data["quadruplicate_plus_rate"]
            ])
            f_out.flush()

    os.unlink(namesorted_sam)
    print(pipe.durations, file=sys.stderr, flush=True)
Beispiel #21
0
def raw():
    f = request.stream
    pipeline.run(f)
    return redirect(url_for('static', filename="post.html"))
Beispiel #22
0
def start_processing_routine():
    try:
        pipeline.run()
    except Exception as e:
        print(e)
    return "Done"
import cv2
import pipeline

img = cv2.imread('solidWhiteRight.jpg')

result = pipeline.run(img)

cv2.imshow('result', result)
cv2.waitKey(0)
cv2.imwrite('hough_result_line3.png', result)
cv2.destroyAllWindows()

# # 동영상 테스트
# cap = cv2.VideoCapture('./test_videos/solidWhiteRight.mp4')
#
# while True:
#     ok, frame = cap.read()
#     if not ok:
#         break
#
#     result = pipeline.run(frame)
#
#     cv2.imshow('result', result)
#     key = cv2.waitKey(1)  # -1
#     if key == ord('x'):
#         break
#
# cap.release()
# cv2.destroyAllWindows()

Beispiel #24
0
import tqdm
import pipeline
import yaml
import sys

if len(sys.argv) == 1:
    with open('config.yml') as f:
	    config = yaml.load(f)
elif len(sys.argv) == 2:
	with open(sys.argv[1]) as f:
	    config = yaml.load(f)
elif len(sys.argv) == 5:
	config = {"city_name": sys.argv[1], "country_name": sys.argv[2], "sample_size": sys.argv[3], 
			  "target_path": sys.argv[4], "stages":["matsim.run"]}
elif len(sys.argv) != 1:
	raise RuntimeError("Wrong number of arguments. Please supply either config file path or <city_name> <country_name> <sample_size>.")

if "disable_progress_bar" in config and config["disable_progress_bar"]:
    tqdm.tqdm = pipeline.safe_tqdm

# use only stage names for running
requested_stages = config["stages"][:]
for i in range(len(requested_stages)):
        if type(requested_stages[i]) == dict:
            requested_stages[i] = list(requested_stages[i].keys())[0]

pipeline.run(
    requested_stages,
    target_path = config["target_path"],
    config = config)
Beispiel #25
0
                filename = database.getFileFullName(fileid)
                with open(filename, "r") as f:
                    return f.read()
            else:
                return get_render().notallowed()
        else:
            raise web.seeother('/')


#-------------------------------------------------------------------------------
class Job:
    def GET(self, jobid):
        if logged():
            if database.isJobFromUser(jobid, session.user):
                jobinfo = database.getJobInfo(jobid)
                return get_render().job(jobinfo)
            else:
                return get_render().notallowed()
        else:
            raise web.seeother('/')


#-------------------------------------------------------------------------------
if __name__ == "__main__":
    database.init()
    p = pipeline.run()
    app.run()
    p.terminate()

#-------------------------------------------------------------------------------
Beispiel #26
0
def cfPipeline():
    """Cell free pipeline.
    """
    
    print(f"cfPipeline { __version__}", file=sys.stderr)
    
    parser = argparse.ArgumentParser()
    parser.add_argument('input_fastqs', nargs="+", help="Paths of input fastq or fastq.gz files. Order is important if paired end reads.")
    parser.add_argument("-r", "--reference", help="Path to reference genome or containing directory.", required=True)
    parser.add_argument("-n", "--name", help="Sample name used to name output files. Will be guessed from input fastq if not provided", default="")
    parser.add_argument("-p", "--panel", help="Path to covermi panel which must contain targets bedfile.", default="")
    parser.add_argument("-u", "--umi", help="UMI type (prism, thruplex_hv or thruplex) or empty strng if no umis.", default="")
    parser.add_argument("-v", "--vep", help="Path to vep datargs.", default="")
    parser.add_argument("-m", "--min-family-size", help="Minimum family size. Families smaller than this will be filtered", type=int, default=1)
    parser.add_argument("-f", "--min-vaf", help="Minimum variant allele frequency for a variant to be called when using VarDict.", type=float, default=None)
    parser.add_argument("-a", "--min-alt-reads", help="Minimum number of alt reads for a variant to be called.", type=float, default=2)
    parser.add_argument("-c", "--cnv", help="Whitespace separated list of target names, as specified in targets bedfile, over which to calculate copy number variation.", default="")
    parser.add_argument("-d", "--sizes", help="Whitespace separated list of reference names over which to calculate fragment size distribution.", default="")
    parser.add_argument("-b", "--translocations", help="Call translocations (supplementary reads aligned to different chromosomes).", action="store_const", const=True, default=False)
    parser.add_argument("-i", "--interleaved", help="Each input fastq contains alternating reads 1 and 2.", action="store_const", const=True, default=False)
    parser.add_argument("-o", "--output", help="Path to write output files to.", default=".")
    parser.add_argument("-t", "--threads", help="Number of threads to use, defaults to all available threads if not specified.", type=int, default=None)
    parser.add_argument("-s", "--sam-only", help="Quit after producing initial undeduplicated sam.", action="store_const", const=True, default=False)
    parser.add_argument("-C", "--callers", help="Variant callers to use. Valid values are varscan, vardict and mutect2. Defaults to 'varscan,vardict'.", default="varscan,vardict")
    parser.add_argument("-D", "--optical-duplicate-distance", help="Maximum pixel distance between two cluster to be considered optical duplicates.", default=None)
    args = parser.parse_args()

    threads = args.threads or run(["getconf", "_NPROCESSORS_ONLN"]).stdout.strip()

    if not args.name:
        args.name = guess_sample_name(args.input_fastqs)
        if not args.name:
            sys.exit("Ambiguous sample name")

    if " " in args.name:
        args.name - args.name.replace(" ", "_")

    if args.min_vaf is None:
        args.min_vaf = 0.01 if args.min_family_size == 1 else 0.001

    args.reference = os.path.abspath(args.reference)
    args.input_fastqs = [os.path.abspath(path) for path in args.input_fastqs]
    if args.panel:
        args.panel = os.path.abspath(args.panel)
    if args.vep:
        args.vep = os.path.abspath(args.vep)
    os.chdir(args.output)

    args.reference = (glob.glob(f"{args.reference}/*.fna") + glob.glob(f"{args.reference}/*.fa") + glob.glob(f"{args.reference}/*.fasta") + [args.reference])[0]
    ref_dir = os.path.dirname(args.reference)
    if glob.glob(f"{ref_dir}/*.sa"):
        bwa = "bwa"
    elif glob.glob(f"{ref_dir}/*.0123"):
        bwa = "bwa-mem2"
    else:
        sys.exit("Invalid bwa indexes")
    targets_bedfile = (glob.glob(f"{args.panel}/*.bed") + [None])[0] if args.panel else ""
    stats = f"{args.name}.stats.json"
    pipe = Pipe()


    # Remove umis and do some basic fastq qc
    interleaved_fastq = f"{args.name}.interleaved.fastq"
    command = ["udini", "--output", interleaved_fastq,
                        "--stats", stats,
                        "--umi", args.umi]
    if args.interleaved:
        command.append("--interleaved")
    pipe(command + args.input_fastqs)


    base_sam = f"{args.name}.base.sam"
    with open(base_sam, "wb") as f_out:
        pipe([bwa, "mem", "-t", threads, 
                          "-p", # interleaved paired end fastq
                          "-C", # Append fastq comment to sam
                          "-v", "1", # Output errors only 
                          args.reference, 
                          interleaved_fastq], stdout=f_out)
    os.unlink(interleaved_fastq)


    sorted_sam = f"{args.name}.sorted.sam"
    pipe(["samtools", "sort", "-o", sorted_sam,
                              "-@", threads,
                              base_sam])
    os.unlink(base_sam)

    if args.sam_only:
        return


    deduplicated_fastq = f"{args.name}.deduplicated.fastq"
    if args.optical_duplicate_distance is not None:
        optical_duplicate_distance = ["--optical-duplicate-distance", args.optical_duplicate_distance]
    else:
        optical_duplicate_distance = []
    pipe(["elduderino", "--output", deduplicated_fastq,
                        "--stats", stats,
                        "--min-family-size", args.min_family_size,
                        "--umi", args.umi] +
                        optical_duplicate_distance +
                        [sorted_sam])
    os.unlink(sorted_sam)


    deduplicated_sam = f"{args.name}.deduplicated.sam"
    with open(deduplicated_sam, "wb") as f_out:
        pipe([bwa, "mem", "-t", threads, 
                          "-p", # interleaved paired end fastq
                          "-C", # Append fastq comment to sam
                          "-Y", # Soft clip non-primary reads
                          "-v", "1", # Output errors only 
                          args.reference, 
                          deduplicated_fastq], stdout=f_out)
    os.unlink(deduplicated_fastq)


    namesorted_sam = f"{args.name}.namesorted.sam"
    pipe(["samtools", "sort", "-n", # sort by name
                              "-o", namesorted_sam,
                              "-@", threads,
                              deduplicated_sam])
    os.unlink(deduplicated_sam)


    pipe(["size", "--stats", stats,
                  "--rnames", args.sizes,
                  "--output", f"{args.name}.sizes.pdf",
                  namesorted_sam])


    ontarget_sam = f"{args.name}.ontarget.sam"
    pipe(["ontarget", "--output", ontarget_sam,
                      "--bed", targets_bedfile,
                      "--stats", stats,
                      "--cnv", args.cnv,
                      "--threads", threads,
                      namesorted_sam])
    os.unlink(namesorted_sam)
    
    
    untrimmed_sam = f"{args.name}.untrimmed.sam"
    pipe(["samtools", "sort", "-o", untrimmed_sam,
                              "-@", threads, 
                              ontarget_sam])
    os.unlink(ontarget_sam)
    
    
    trimmed_sam = f"{args.name}.trimmed.sam"
    pipe(["trim", "--output", trimmed_sam,
                  "--reference", args.reference,
                  untrimmed_sam])
    os.unlink(untrimmed_sam)
    
    
    namesorted_sam = f"{args.name}.namesorted.sam"
    pipe(["samtools", "sort", "-n", # sort by name
                              "-o", namesorted_sam,
                              "-@", threads, 
                              trimmed_sam])
    os.unlink(trimmed_sam)


    fixed_sam = f"{args.name}.fixed.sam"
    pipe(["samtools", "fixmate", namesorted_sam, fixed_sam])
    os.unlink(namesorted_sam)


    if args.translocations:
        pipe(["breakpoint", "--output", f"{args.name}.translocations.tsv",
                            fixed_sam])


    no_read_groups_bam = f"{args.name}.no_read_groups.bam"
    pipe(["samtools", "sort", "-o", no_read_groups_bam,
                              "-@", threads,
                              fixed_sam])
    os.unlink(fixed_sam)


    bam = f"{args.name}.bam"
    # This step is only required to satisfy Mutect2 and possibly other gatk tools
    pipe(["gatk", "AddOrReplaceReadGroups", f"I={no_read_groups_bam}", f"O={bam}", "LB=lb", "PL=ILLUMINA", "PU=pu", f"SM={args.name}"])
    os.unlink(no_read_groups_bam)


    pipe(["samtools", "index", bam])


    if args.panel:
        pipe(["covermi_stats", "--panel", args.panel,
                               "--output", f"{args.name}.covermi.pdf",
                               "--stats", stats,
                               bam])


    pipe(["call_variants", "--reference", args.reference,
                           "--callers", args.callers,
                           "--name", args.name,
                           "--panel", args.panel,
                           "--vep", args.vep,
                           "--min-vaf", args.min_vaf,
                           "--min-alt-reads", 2,
                           "--output", ".", # We have already changed directory into the current directory
                           "--threads", threads,
                           bam])


    #vaf_plot = f"{args.name}.vaf.pdf"
    pipe(["vcf_stats", f"{args.name}.vardict.vcf", # May need to change this depending on variant caller performance
                       "--stats", stats])
                       #"--output", vaf_plot])

    print(pipe.durations, file=sys.stderr, flush=True)
Beispiel #27
0
def annotate_panel(vcf,
                   vep,
                   reference=None,
                   threads=None,
                   output="",
                   panel="",
                   buffer_size=None):
    if threads is None:
        threads = run(["getconf", "_NPROCESSORS_ONLN"]).stdout.strip()

    if not output:
        output = "."
    if os.path.isdir(output):
        output = os.path.join(
            output, "{}.annotation.tsv".format(
                vcf[:-4] if vcf.endswith(".vcf") else vcf))

    vepjson = "{}.vep.json".format(output[:-4])
    vep_options = [
        "--no_stats", "--dir", vep, "--format", "vcf", "--json", "--offline",
        "--everything", "--warning_file", "STDERR", "--force_overwrite"
    ]
    if reference is not None:
        reference = (glob.glob(f"{reference}/*.fna") +
                     glob.glob(f"{reference}/*.fa") +
                     glob.glob(f"{reference}/*.fasta") + [reference])[0]
        vep_options += ["--fasta", reference]
    if int(threads) > 1:
        vep_options += ["--fork", threads]
    if "refseq" in vep:
        vep_options += ["--refseq"]
    if buffer_size is not None:
        vep_options += ["--buffer_size", buffer_size]

    pipe(["vep", "-i", vcf, "-o", vepjson] + vep_options)

    get_read_data = None
    with open(vcf, "rt") as f:
        for row in f:
            if not row.startswith("#"):
                break
            if row.startswith("##source="):
                source = row[9:].strip()
                #if source == "strelka":
                if source.startswith("VarDict"):
                    get_read_data = vardict_read_data
                elif source == "VarScan2":
                    get_read_data = varscan2_read_data
                elif source == "Mutect2":
                    get_read_data = mutect2_read_data
            headings = row

    if get_read_data is None:
        sys.exit(f"Unsupported variant caller {source}")
    if len(headings.split("\t")) > 10:
        sys.exit("Multi-sample vcfs not suppored")

    targets = None
    principal = {}
    needed_genes = set()
    needed_transcripts = set()
    if panel:
        panel = Panel(panel)
        if "targets" in panel:
            targets = panel.targets

        if "names" in panel:
            for name in panel.names:
                name = name.split()
                needed_genes.add(name[0])
                if len(name) > 1:
                    needed_transcripts.add(name[1])
        if "principal" in panel.paths:
            principal = appris(panel.paths["principal"])

    if "refseq" in vep:

        def consequence_sort(cons):
            transcript, minor = cons["transcript_id"].split(".")
            prefix = transcript[:2]
            major = transcript[3:]
            return [
                transcript in needed_transcripts, cons["gene_symbol"]
                in needed_genes, BIOTYPE[cons["biotype"]], REFSEQ[prefix],
                -int(cons["gene_id"]),
                principal.get(transcript, 0), "canonical" in cons, -int(major),
                int(minor)
            ]

    else:  # ensembl transcripts

        def consequence_sort(cons):
            # Version numbers not in vep as of version 101, but who knows the future ...
            transcript = cons["transcript_id"]
            return [
                transcript in needed_transcripts, cons["gene_symbol"]
                in needed_genes, BIOTYPE[cons["biotype"]],
                -int(cons["gene_id"].translate(DELETE_NON_DIGIT)),
                principal.get(transcript, 0), "canonical" in cons,
                -int(transcript.translate(DELETE_NON_DIGIT))
            ]

    annotations = []
    with open(vepjson) as f:
        for line in f:
            vep_output = json.loads(line)

            consequences = vep_output.get("transcript_consequences")
            if consequences:
                cons = sorted(consequences, key=consequence_sort)[-1]
                other_genes = set(c["gene_symbol"]
                                  for c in consequences) - set(
                                      [cons["gene_symbol"]])

            else:
                most_severe_consequence = vep_output["most_severe_consequence"]
                for cons in sorted(chain(*[
                        v for k, v in vep_output.items()
                        if k.endswith("_consequences")
                ]),
                                   key=lambda x: x.get("biotype", ""),
                                   reverse=True):
                    # We are only going to use biotype and impact so probably does not matter which one we choose so long as we are consistent
                    if most_severe_consequence in cons["consequence_terms"]:
                        break
                other_genes = ()

            row = vep_output["input"].rstrip().split("\t")
            read_data = get_read_data(row)

            if read_data["alt_depth"] == "0":
                continue

            # https://gatk.broadinstitute.org/hc/en-us/articles/360035532152-Fisher-s-Exact-Test
            fisher_strand = -10 * math.log10(
                fisher_exact([read_data["ref_fr"], read_data["alt_fr"]])[1])

            demographics = parse_colocated(vep_output)

            annotations.append([
                cons.get("gene_symbol", ""),
                cons.get("transcript_id", ""), row[CHROM], vep_output["start"],
                vep_output["allele_string"], row[QUAL], row[FILTERS],
                read_data["vaf"], read_data["depth"], read_data["alt_depth"],
                "{}:{}".format(*read_data["alt_fr"]),
                "{}:{}".format(*read_data["ref_fr"]),
                "{:.1f}".format(fisher_strand),
                cons.get("hgvsc", ""),
                cons.get("hgvsp", ""),
                cons.get("biotype", ""),
                cons.get("impact",
                         ""), ", ".join(demographics.get("clin_sig", ())),
                ", ".join(cons.get("consequence_terms", ())),
                cons.get("sift_prediction", ""),
                cons.get("polyphen_prediction", ""), "{:.10f}".format(
                    demographics["maf"]) if "maf" in demographics else "",
                ", ".join(sorted(other_genes)),
                ", ".join(demographics.get("dbsnp", ())),
                ", ".join(demographics.get("hgmd", ())), ", ".join(
                    demographics.get("cosmic", ())), ", ".join(
                        demographics.get("pubmed", ()))
            ])

    os.unlink(vepjson)
    annotations.sort(key=lambda r: (chrom2int(r[2]), r[2], int(r[3]), r[4]))
    with open(output, "wt") as f:
        writer = csv.writer(f, delimiter="\t")
        writer.writerow([
            "Gene", "Transcript", "Chrom", "Pos", "Change", "Quality",
            "Filters", "VAF", "Depth", "Alt Depth", "Alt Depth F:R",
            "Ref Depth F:R", "FisherStrand", "HGVSc", "HGVSp", "Biotype",
            "Impact", "Clinical Significance (Pubmed)", "Consequences", "Sift",
            "Polyphen", "MAF", "Other Genes", "dbSNP", "HGMD", "COSMIC",
            "Pubmed"
        ])
        writer.writerows(annotations)
Beispiel #28
0
 def _import_results_for_analysis(self):
     if not os.path.isfile(self._destination_fname):
         pipeline.run(self._address_list_fname,
                      self._postcode_reference_fname,
                      self._destination_fname)
     return pd.read_csv(self._destination_fname, sep='\t')
Beispiel #29
0
                     input_folder=input_path,
                     refine=False,
                     refiner_model=weights_path+'craft_refiner_CTW1500.pth')
    
    """ For test images in a folder """
    image_list, _, _ = file_utils.get_files(args.input_folder)
    image_names = []
    
    for num in range(len(image_list)):
        image_names.append(os.path.relpath(image_list[num], args.input_folder))

    # create result folder if it is not exist
    if not os.path.isdir(result_folder):
        os.mkdir(result_folder)
    
    args.image_list=image_list
    args.image_names=image_names
    
    # load craft model
    craft_net = pipeline.load_craft(args)
    
    # load refiner
    if args.refine:
        refine_net = pipeline.load_refiner(args)
        refine_net.eval()
        args.poly = True
    else:
        refine_net = None
    
    pipeline.run(args,craft_net,refine_net)
    crop_image.run()
Beispiel #30
0
#### TRAIN and TEST DATA WITHOUT BPE
# verplaats directory van je modelsss na runnen
hidden_size = 256
dropout_p = 0.1
learning_rate = 0.01
n_epochs = 20
max_hours = 23
clip = 8
use_bpe = False
replace_unknown_words = True
padding = True
MAX_LENGTH = 25
plot_every = 200
print_every = 28319  # every epoch

encoder, attn_decoder, slang, tlang, plot_losses, max_bpe_length = pipeline.run(
    fp.spath_train,
    fp.tpath_train,
    fp.spath_test,
    fp.tpath_test,
    train_model_pos,
    predict_allPOS,
    max_sentence_length=MAX_LENGTH,
    replace_unknown_words=True,
    use_bpe=False,
    num_operations=100,
    vocab_threshold=1,
    padding=False,
    model_name='pos_no_bpe')
Beispiel #31
0
#!/usr/bin/env python

import pipeline
import colorsys, hashlib, random

def calibration(tick, pixel):
        if pixel.strip_offset == pipeline.strip_length - 1:
                base_color = (1, 1, 1)
        elif pixel.strip_offset == 0:
                base_color = colorsys.hsv_to_rgb(pixel.strip / float(pipeline.strips), 0.25, 1)
        elif pixel.strip_offset % 10 == 0:
                base_color = colorsys.hsv_to_rgb(pixel.strip / float(pipeline.strips), 1, 1)
        else:
                base_color = colorsys.hsv_to_rgb(pixel.strip / float(pipeline.strips), 0.5, 1)
        pixel.color = base_color

ourPipeline = [pipeline.Fragment(calibration)]

if __name__ == '__main__':
        pipeline.run(pipeline.pixels, ourPipeline, 20)
Beispiel #32
0
#### RNN: TRAIN and Validation DATA using BPE

hidden_size = 256
dropout_p = 0.1
learning_rate = 0.01
n_epochs = 20
max_hours = 23
clip = 8
use_bpe = True
replace_unknown_words = True
padding = False
MAX_LENGTH = 25
plot_every = 200
print_every = 28319  # every epoch

(encoder, attn_decoder, slang, tlang, plot_losses,
 max_bpe_length) = pipeline.run(fp.spath_train,
                                fp.tpath_train,
                                fp.spath_test,
                                fp.tpath_test,
                                train_model,
                                predict_all,
                                max_sentence_length=MAX_LENGTH,
                                replace_unknown_words=True,
                                use_bpe=True,
                                num_operations=400,
                                vocab_threshold=5,
                                padding=False,
                                model_name='rnn_bpe')
Beispiel #33
0
def rossiPipeline():
    """Cell free pipeline.
    """

    print(f"rossiPipeline {__version__}", file=sys.stderr)

    parser = argparse.ArgumentParser()
    parser.add_argument(
        'input_fastqs',
        nargs="+",
        help=
        "Paths of input fastq or fastq.gz files. Order is important if paired end reads."
    )
    parser.add_argument(
        "-r",
        "--reference",
        help="Path to reference genome or containing directory.",
        required=True)
    parser.add_argument(
        "-n",
        "--name",
        help=
        "Sample name used to name output files. Will be guessed from input fastq if not provided",
        default="")
    parser.add_argument(
        "-p",
        "--panel",
        help="Path to covermi panel which must contain targets bedfile.",
        default="")
    parser.add_argument(
        "-u",
        "--umi",
        help=
        "UMI type (prism, thruplex_hv or thruplex) or empty strng if no umis.",
        default="")
    parser.add_argument("-v", "--vep", help="Path to vep datargs.", default="")
    parser.add_argument(
        "-f",
        "--min-vaf",
        help=
        "Minimum variant allele frequency for a variant to be called when using VarDict.",
        type=float,
        default=None)
    parser.add_argument(
        "-a",
        "--min-alt-reads",
        help="Minimum number of alt reads for a variant to be called.",
        type=float,
        default=2)
    parser.add_argument(
        "-c",
        "--cnv",
        help=
        "Whitespace separated list of target names, as specified in targets bedfile, over which to calculate copy number variation.",
        default="")
    parser.add_argument(
        "-d",
        "--sizes",
        help=
        "Whitespace separated list of reference names over which to calculate fragment size distribution.",
        default="")
    parser.add_argument(
        "-b",
        "--translocations",
        help=
        "Call translocations (supplementary reads aligned to different chromosomes).",
        action="store_const",
        const=True,
        default=False)
    parser.add_argument("-o",
                        "--output",
                        help="Path to write output files to.",
                        default=".")
    parser.add_argument(
        "-t",
        "--threads",
        help=
        "Number of threads to use, defaults to all available threads if not specified.",
        type=int,
        default=None)
    parser.add_argument(
        "-C",
        "--callers",
        help=
        "Variant callers to use. Valid values are varscan, vardict and mutect2. Defaults to 'varscan,vardict'.",
        default="varscan,vardict")
    args = parser.parse_args()

    threads = args.threads or run(["getconf", "_NPROCESSORS_ONLN"
                                   ]).stdout.strip()

    if not args.name:
        args.name = guess_sample_name(args.input_fastqs)
        if not args.name:
            sys.exit("Ambiguous sample name")

    if " " in args.name:
        args.name - args.name.replace(" ", "_")

    if args.min_vaf is None:
        args.min_vaf = 0.01 if args.min_family_size == 1 else 0.001

    args.reference = os.path.abspath(args.reference)
    args.input_fastqs = [os.path.abspath(path) for path in args.input_fastqs]
    if args.panel:
        args.panel = os.path.abspath(args.panel)
    if args.vep:
        args.vep = os.path.abspath(args.vep)
    os.chdir(args.output)

    args.reference = (glob.glob(f"{args.reference}/*.fna") +
                      glob.glob(f"{args.reference}/*.fa") +
                      glob.glob(f"{args.reference}/*.fasta") +
                      [args.reference])[0]
    ref_dir = os.path.dirname(args.reference)
    if glob.glob(f"{ref_dir}/*.sa"):
        bwa = "bwa"
    elif glob.glob(f"{ref_dir}/*.0123"):
        bwa = "bwa-mem2"
    else:
        sys.exit("Invalid bwa indexes")
    targets_bedfile = (glob.glob(f"{args.panel}/*.bed") +
                       [None])[0] if args.panel else ""
    stats = f"{args.name}.stats.json"
    pipe = Pipe()

    # FastUniq requires ungzipped fastqs
    ungzipped_fastqs = []
    temp_fastqs = []
    for fastq in args.input_fastqs:
        if fastq.endswith(".gz"):
            run(["gunzip", "-k", fastq])
            fastq = fastq[:-3]
            temp_fastqs.append(fastq)
        ungzipped_fastqs.append(fastq)

    if len(ungzipped_fastqs) > 2:
        with open(f"{args.name}_R1.fastq", "wb") as f_out:
            pipe(["cat"] + ungzipped_fastqs[::2], stdout=f_out)
        with open(f"{args.name}_R2.fastq", "wb") as f_out:
            pipe(["cat"] + ungzipped_fastqs[1::2], stdout=f_out)
        ungzipped_fastqs = [f"{args.name}_r1.fastq", f"{args.name}_r2.fastq"]
        for fastq in temp_fastqs:
            os.unlink(fastq)
        temp_fastqs = list(ungzipped_fastqs)

    fastq_names = f"{args.name}.fastqs.txt"
    with open(fastq_names, "wt") as f_out:
        f_out.write("{}\n{}\n".format(*ungzipped_fastqs))

    deduplicated_fastqs = [
        f"{args.name}_R1.deduplicated.fastq",
        f"{args.name}_R2.deduplicated.fastq"
    ]
    pipe([
        "fastuniq", "-i", fastq_names, "-o", deduplicated_fastqs[0], "-p",
        deduplicated_fastqs[1]
    ])
    os.unlink(fastq_names)
    os.unlink(temp_fastqs)

    # Remove umis and do some basic fastq qc
    interleaved_fastq = f"{args.name}.interleaved.fastq"
    command = [
        "udini", "--output", interleaved_fastq, "--stats", stats, "--umi",
        args.umi
    ]
    pipe(command + deduplicated_fastqs)
    for fastq in deduplicated_fastqs:
        os.unlink(fastq)

    base_sam = f"{args.name}.base.sam"
    with open(base_sam, "wb") as f_out:
        pipe(
            [
                bwa,
                "mem",
                "-t",
                threads,
                "-p",  # interleaved paired end fastq
                "-C",  # Append fastq comment to sam
                "-v",
                "1",  # Output errors only 
                args.reference,
                interleaved_fastq
            ],
            stdout=f_out)
    os.unlink(interleaved_fastq)

    namesorted_sam = f"{args.name}.namesorted.sam"
    pipe([
        "samtools",
        "sort",
        "-n",  # sort by name
        "-o",
        namesorted_sam,
        "-@",
        threads,
        base_sam
    ])
    os.unlink(base_sam)

    pipe([
        "size", "--stats", stats, "--rnames", args.sizes, "--output",
        f"{args.name}.sizes.pdf", namesorted_sam
    ])

    ontarget_sam = f"{args.name}.ontarget.sam"
    pipe([
        "ontarget", "--output", ontarget_sam, "--bed", targets_bedfile,
        "--stats", stats, "--cnv", args.cnv, "--threads", threads,
        namesorted_sam
    ])
    os.unlink(namesorted_sam)

    # This is likely not necessary
    namesorted_sam = f"{args.name}.namesorted.sam"
    pipe([
        "samtools",
        "sort",
        "-n",  # sort by name
        "-o",
        namesorted_sam,
        "-@",
        threads,
        ontarget_sam
    ])
    os.unlink(ontarget_sam)

    fixed_sam = f"{args.name}.fixed.sam"
    pipe(["samtools", "fixmate", namesorted_sam, fixed_sam])
    os.unlink(namesorted_sam)

    if args.translocations:
        pipe([
            "breakpoint", "--output", f"{args.name}.translocations.tsv",
            fixed_sam
        ])

    no_read_groups_bam = f"{args.name}.no_read_groups.bam"
    pipe([
        "samtools", "sort", "-o", no_read_groups_bam, "-@", threads, fixed_sam
    ])
    os.unlink(fixed_sam)

    bam = f"{args.name}.bam"
    # This step is only required to satisfy Mutect2 and possibly other gatk tools
    pipe([
        "gatk", "AddOrReplaceReadGroups", f"I={no_read_groups_bam}",
        f"O={bam}", "LB=lb", "PL=ILLUMINA", "PU=pu", f"SM={args.name}"
    ])
    os.unlink(no_read_groups_bam)

    pipe(["samtools", "index", bam])

    if args.panel:
        pipe([
            "covermi_stats", "--panel", args.panel, "--output",
            f"{args.name}.covermi.pdf", "--stats", stats, bam
        ])

    pipe([
        "call_variants",
        "--reference",
        args.reference,
        "--callers",
        args.callers,
        "--name",
        args.name,
        "--panel",
        args.panel,
        "--vep",
        args.vep,
        "--min-vaf",
        args.min_vaf,
        "--min-alt-reads",
        args.min_family_size,
        "--output",
        ".",  # We have already changed directory into the current directory
        "--threads",
        threads,
        bam
    ])

    #vaf_plot = f"{args.name}.vaf.pdf"
    pipe([
        "vcf_stats",
        f"{args.name}.vardict.vcf",  # May need to change this depending on variant caller performance
        "--stats",
        stats
    ])
    #"--output", vaf_plot])

    print(pipe.durations, file=sys.stderr, flush=True)
Beispiel #34
0
def call_variants():
    """Cell free pipeline2 variant calling.
    """

    parser = argparse.ArgumentParser()
    parser.add_argument('input_bam', help="Path of the input bam file.")
    parser.add_argument(
        "-r",
        "--reference",
        help="Path to reference genome or containing directory.",
        required=True)
    parser.add_argument(
        "-C",
        "--callers",
        help=
        "Variant callers to use. Valid values are varscan, vardict and mutect2. Defaults to 'varscan,vardict'.",
        default="varscan,vardict")
    parser.add_argument(
        "-n",
        "--name",
        help=
        "Sample name used to name output files. Will be guessed from input bam if not provided",
        default="")
    parser.add_argument(
        "-p",
        "--panel",
        help=
        "Path to covermi panel which must contain targets bedfile. Required for annotation.",
        default="")
    parser.add_argument("-v",
                        "--vep",
                        help="Path to vep cache. Required for annotation.",
                        default="")
    parser.add_argument(
        "-f",
        "--min-vaf",
        help="Minimum variant allele frequency for a variant to be called.",
        type=float,
        default=0)
    parser.add_argument(
        "-a",
        "--min-alt-reads",
        help="Minimum number of alt reads for a variant to be called.",
        type=int,
        default=2)
    parser.add_argument("-o",
                        "--output",
                        help="Path to write output files to.",
                        default=".")
    parser.add_argument(
        "-t",
        "--threads",
        help=
        "Number of threads to use, defaults to all available threads if not specified.",
        type=int,
        default=None)
    args = parser.parse_args()

    threads = args.threads or run(["getconf", "_NPROCESSORS_ONLN"
                                   ]).stdout.strip()

    if not args.name:
        fn = os.path.basename(args.input_bam)
        args.name = fn[:-4] if fn.endswith(".bam") else fn

    args.callers = args.callers.lower().replace(",", " ").split()
    for caller in args.callers:
        if caller not in ("varscan", "vardict", "mutect2"):
            sys.exit(f"{caller} is not a recognised variant caller")

    args.reference = os.path.abspath(args.reference)
    args.input_bam = os.path.abspath(args.input_bam)
    if args.panel:
        args.panel = os.path.abspath(args.panel)
    if args.vep:
        args.vep = os.path.abspath(args.vep)
    os.chdir(args.output)

    args.reference = (glob.glob(f"{args.reference}/*.fna") +
                      glob.glob(f"{args.reference}/*.fa") +
                      glob.glob(f"{args.reference}/*.fasta") +
                      [args.reference])[0]
    pipe = Pipe()

    targets_bedfile = glob.glob(f"{args.panel}/*.bed") if args.panel else []
    targets_bedfile = targets_bedfile[0] if len(targets_bedfile) == 1 else ""

    if "vardict" in args.callers and not targets_bedfile:
        sys.exit(f"No targets bedfile found (required by vardict)")
    if "mutect2" in args.callers and not os.path.exists(
            f"{args.input_bam}.bai"):
        sys.exit(f"No index found for {args.input_bam} (required by mutect2)")

    ###############################################################################################################
    ### VARSCAN                                                                                                 ###
    ###############################################################################################################
    if "varscan" in args.callers:
        mpileup = f"{args.name}.mpileup"
        pipe([
            "samtools", "mpileup", "-o", mpileup, "-f", args.reference, "-A",
            "-B", "-q", "10", "-d", "10000000", args.input_bam
        ])

        pvalue_vcf = f"{args.name}.pvalue.vcf"
        with open(pvalue_vcf, "wb") as f_out:
            pipe([
                "varscan", "mpileup2cns", mpileup, "--variants",
                "--output-vcf", "1", "--min-coverage", "1", "--min-var-freq",
                args.min_vaf, "--min-avg-qual", "20", "--min-reads2",
                args.min_alt_reads, "--p-value", "0.05", "--strand-filter", "1"
            ],
                 stdout=f_out)
        os.unlink(mpileup)

        vcf = f"{args.name}.varscan.unfiltered.vcf" if targets_bedfile else f"{args.name}.varscan.vcf"
        pipe(["postprocess_varscan_vcf", pvalue_vcf, "--output", vcf])
        os.unlink(pvalue_vcf)

        if targets_bedfile:
            unfiltered_vcf = vcf
            vcf = f"{args.name}.varscan.vcf"
            pipe([
                "filter_vcf", unfiltered_vcf, "--output", vcf, "--bed",
                targets_bedfile
            ])
            os.unlink(unfiltered_vcf)

        if args.vep and args.panel:
            pipe([
                "annotate_panel", "--vep", args.vep, "--output",
                f"{args.name}.varscan.annotation.tsv", "--reference",
                args.reference, "--threads", threads, "--panel", args.panel,
                vcf
            ])

    ###############################################################################################################
    ### VARDICT                                                                                                 ###
    ###############################################################################################################
    if "vardict" in args.callers:
        vardict_table = f"{args.name}.vardict.tsv"
        with open(vardict_table, "wb") as f_out:
            pipe(
                [
                    "vardictjava",
                    "-K",  # include Ns in depth calculation
                    "-deldupvar",  # variants are only called if start position is inside the region interest
                    "-G",
                    args.reference,
                    "-N",
                    args.name,
                    "-b",
                    args.input_bam,
                    "-Q",
                    "10",
                    "-f",
                    args.min_vaf,
                    "-r",
                    args.min_alt_reads,
                    "-th",
                    threads,
                    "-u",  # count mate pair overlap only once
                    "-fisher",  # perform work of teststrandbias.R
                    targets_bedfile
                ],
                stdout=f_out)

        unfiltered_vcf = f"{args.name}.vardict.unfiltered.vcf"
        with open(vardict_table, "rb") as f_in:
            with open(unfiltered_vcf, "wb") as f_out:
                pipe(
                    [
                        "var2vcf_valid.pl",
                        "-A",  # output all variants at same position
                        "-f",
                        args.min_vaf,
                        "-N",
                        args.name
                    ],
                    stdin=f_in,
                    stdout=f_out)
        os.unlink(vardict_table)

        vcf = f"{args.name}.vardict.vcf"
        # Although vardict take the targets bedfile as an argument is does call occasional variants just outside
        pipe([
            "filter_vcf", unfiltered_vcf, "--output", vcf, "--bed",
            targets_bedfile
        ])
        os.unlink(unfiltered_vcf)

        if args.vep and args.panel:
            pipe([
                "annotate_panel", "--vep", args.vep, "--output",
                f"{args.name}.vardict.annotation.tsv", "--reference",
                args.reference, "--threads", threads, "--panel", args.panel,
                vcf
            ])

    ###############################################################################################################
    ### MUTECT2                                                                                                 ###
    ###############################################################################################################
    if "mutect2" in args.callers:
        unmutectfiltered_vcf = f"{args.name}.unmutectfiltered.mutect2.vcf"
        pipe([
            "gatk", "Mutect2", "-R", args.reference, "-I", args.input_bam,
            "-O", unmutectfiltered_vcf, "--create-output-variant-index",
            "false", "--max-reads-per-alignment-start", "0",
            "--disable-read-filter", "NotDuplicateReadFilter",
            "--disable-read-filter", "GoodCigarReadFilter"
        ])

        multiallelic_vcf = f"{args.name}.multiallelic.mutect2.vcf"
        pipe([
            "gatk", "FilterMutectCalls", "-R", args.reference, "-V",
            unmutectfiltered_vcf, "-O", multiallelic_vcf, "--filtering-stats",
            "false", "--create-output-variant-index", "false"
        ])
        os.unlink(unmutectfiltered_vcf)
        os.unlink(f"{unmutectfiltered_vcf}.stats")

        vcf = f"{args.name}.mutect2.unfiltered.vcf" if targets_bedfile else f"{args.name}.mutect2.vcf"
        pipe([
            "postprocess_mutect2_vcf", "--output", vcf, "--min-alt-reads",
            args.min_alt_reads, "--min-vaf", args.min_vaf, multiallelic_vcf
        ])
        os.unlink(multiallelic_vcf)

        if targets_bedfile:
            unfiltered_vcf = vcf
            vcf = f"{args.name}.mutect2.vcf"
            pipe([
                "filter_vcf", unfiltered_vcf, "--output", vcf, "--bed",
                targets_bedfile
            ])
            os.unlink(unfiltered_vcf)

        if args.vep and args.panel:
            pipe([
                "annotate_panel", "--vep", args.vep, "--output",
                f"{args.name}.mutect2.annotation.tsv", "--reference",
                args.reference, "--threads", threads, "--panel", args.panel,
                vcf
            ])

    print(pipe.durations, file=sys.stderr, flush=True)
Beispiel #35
0
def convert_button(fname) -> str:
    result = run(fname)
    return result
Beispiel #36
0
Datei: main.py Projekt: Lax/Sloth
 def process(self, f):
     try:
         pipeline.run(f)
     except:
         logging.exception('put failed')
Beispiel #37
0
#print testcasePath

# if not args.just:
#     args.just = False

#import sys
#sys.exit(1)

# preprocess
if args.run_pre:
    if args.output_only: print 'only output traced'
    pipeline_preprocessing.preprocess_pipeline_data(datadir, testcasePath,
                                                    args.output_only,
                                                    args.funcname, jsonPath,
                                                    args.just)

if args.run_pipeline or args.run_old:

    if args.run_pipeline:
        if args.json:
            outputPath = jsonPath
        else:
            outputPath = path.join(args.basedir, 'output')
        if args.grader_path:
            pipeline.set_grader(args.grader_path)
        pipeline.run(datadir, outputPath, args.distances)
    else:
        # The old, original pipeline. Here there be dragons.
        outputPath = path.join(args.basedir, 'output_old')
        pipeline_old.run(datadir, outputPath)
Beispiel #38
0
                self.strip = random.randint(0, pipeline.strips - 1)
                self.color = colorsys.hsv_to_rgb(random.random(), .4, 4)
                self.strength = 0
                self.delay = random.randint(61, 500)

        def pre_frame(self, tick, pixels):
                self.delay -= 1
                if self.delay < 0:
                        self.reset()
                if self.delay >= 60:
                        self.strength = 0
                        return

                fade_progress = self.delay
                fade_distance = fade_progress / 59.0
                self.strength = fade_distance

        def fragment(self, tick, pixel):
                if pixel.strip != self.strip:
                        return
                def compute_color(i):
                        flash_part = self.color[i] * self.strength
                        base_part = pixel.color[i] * (1 - self.strength)
                        return flash_part + base_part
                pixel.color = tuple((compute_color(c) for c in xrange(3)))

ourPipeline = [pipeline.Fragment(sky)] + [Raindrop() for x in xrange(50)] + [Lightning()]

if __name__ == '__main__':
        pipeline.run(pipeline.pixels, ourPipeline, .03)
Beispiel #39
0
#import sys
#sys.exit(1)

# preprocess
if args.run_pre:
    if args.output_only: print 'only output traced'
    pipeline_preprocessing.preprocess_pipeline_data(
        datadir,
        testcasePath,
        args.output_only,
        args.funcname,
        jsonPath,
        args.just
    )

if args.run_pipeline or args.run_old:

    if args.run_pipeline:
        if args.json:
            outputPath = jsonPath
        else:
            outputPath = path.join(args.basedir, 'output')
        if args.grader_path:
            pipeline.set_grader(args.grader_path)
        pipeline.run(datadir, outputPath, args.distances)
    else:
        # The old, original pipeline. Here there be dragons.
        outputPath = path.join(args.basedir, 'output_old')
        pipeline_old.run(datadir, outputPath)