Example #1
0
def main():
    try:
        debug=ast.literal_eval(sys.argv[1])
    except IndexError:
        debug=True

    if (debug):
        print ("***************************************\n"
               "\t\t\t DEBUG \n"
               "***************************************\n")

    interaction_file = str(Path("Papers/1-s2.0-S009286741300439X-mmc1.txt"))
    log_dir = "Datafiles_Prepare/Logs/"
    tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True)


    organisms = ["Human"]
    for organism in organisms:
        JsonLog.set_filename(
            utils.filename_date_append(Path(log_dir) / Path("Mapping_the_Human_miRNA_" + organism + ".json")))
        JsonLog.add_to_json('file name', interaction_file)
        JsonLog.add_to_json('paper',
                            "Mapping the Human miRNA Interactome by CLASH Reveals Frequent Noncanonical Binding")
        JsonLog.add_to_json('Organism', organism)
        JsonLog.add_to_json('paper_url', "https://www.sciencedirect.com/science/article/pii/S009286741300439X")
        p = Pipeline(paper_name="Mapping_the_Human_miRNA",
                     organism=organism,
                     in_df=df_prepare(read_paper_data(interaction_file, debug)),
                     tmp_dir=tmp_dir)

        p.run()
    def test_classification_total(self):
        input_list = [
            ['1979.486.1', 'FALSE', 'FALSE', '1', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
             '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'Metal'],
            ['1980.2d64.5', 'FALSE', 'FALSE', '2', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
             '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'Furniture'],
            ['67.265', 'FALSE', 'FALSE', '3', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
             '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'Metal'],
            ['67.265.10', 'FALSE', 'FALSE', '4', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
             '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'Gold']
        ]

        output_dict = {}
        for row in input_list:
            Pipeline.running_total(row, output_dict)

        self.assertEqual(2,output_dict['Metal'])
        self.assertEqual(1, output_dict['Furniture'])
        self.assertEqual(1, output_dict['Gold'])



        def select_table(self):

            DBUtil.select_table("classification_totals")
Example #3
0
def menu():

    ini_path = os.path.dirname(os.path.realpath(__file__))
    ini_path = os.path.join(ini_path, 'input.ini')

    Ini = ReadIni(ini_path)
    path = Ini.project_path
    start = Ini.start
    end = Ini.end
    test_begin(end, start)

    now = datetime.now()
    now = now.strftime("%b %d %Y %H:%M:%S")
    mkdir(path)
    rec = 'Project begins.'
    rec += '\n' + '***'*25
    rename_file(path, 'record')
    record(path, rec, init=True)
    print('***'*25)
    print(now)
    print(rec)
    try:
        shutil.copy(ini_path, path + '/input.ini')
    except Exception as e:
        print(e)

    Pipeline.pipeline(path, start, end)
Example #4
0
    def get_tree(self):
        from AnalysedTreeTransforms import AutoTestDictTransform
        # The AutoTestDictTransform creates the statement "__test__ = {}",
        # which when copied into the main ModuleNode overwrites
        # any __test__ in user code; not desired
        excludes = [AutoTestDictTransform]

        import Pipeline, ParseTreeTransforms
        context = CythonUtilityCodeContext(self.name)
        context.prefix = self.prefix
        #context = StringParseContext(self.name)
        tree = parse_from_strings(self.name, self.pyx, context=context)
        pipeline = Pipeline.create_pipeline(context,
                                            'pyx',
                                            exclude_classes=excludes)

        transform = ParseTreeTransforms.CnameDirectivesTransform(context)
        # InterpretCompilerDirectives already does a cdef declarator check
        #before = ParseTreeTransforms.DecoratorTransform
        before = ParseTreeTransforms.InterpretCompilerDirectives
        pipeline = Pipeline.insert_into_pipeline(pipeline,
                                                 transform,
                                                 before=before)

        (err, tree) = Pipeline.run_pipeline(pipeline, tree)
        assert not err, err
        return tree
def filterAlignments(infile, outfile):
    '''
    filter alignments to retain only those that
    have > 99% identity to the reference
    '''
    to_cluster = True
    statement = '''delta-filter -q -i 99 %(infile)s > %(outfile)s'''
    P.run()
Example #6
0
def downloadSCOP( infile, outfile ):
    '''download the latest scop sequence set (< 40% identical)'''
    
    statement = '''
    wget -O %(outfile)s "http://astral.berkeley.edu/seq.cgi?get=scopdom-seqres-gd-sel-gs-bib;ver=1.75;item=seqs;cut=40"
    '''
    
    P.run()
def buildAlignmentCoordinates(infile, outfile):
    '''
    build coordinates file from alignment delta
    file
    '''
    to_cluster = True
    statement = '''show-coords -T -r %(infile)s > %(outfile)s'''
    P.run()
def filterAlignments(infile, outfile):
    '''
    filter alignments to retain only those that
    have > 99% identity to the reference
    '''
    to_cluster = True
    statement = '''delta-filter -q -i 99 %(infile)s > %(outfile)s'''
    P.run()
def buildAlignmentCoordinates(infile, outfile):
    '''
    build coordinates file from alignment delta
    file
    '''
    to_cluster = True
    statement = '''show-coords -T -r %(infile)s > %(outfile)s'''
    P.run()
Example #10
0
 def process_pxd(self, source_desc, scope, module_name):
     import Pipeline
     if isinstance(source_desc, FileSourceDescriptor) and source_desc._file_type == 'pyx':
         source = CompilationSource(source_desc, module_name, os.getcwd())
         result_sink = create_default_resultobj(source, self.options)
         pipeline = Pipeline.create_pyx_as_pxd_pipeline(self, result_sink)
         result = Pipeline.run_pipeline(pipeline, source)
     else:
         pipeline = Pipeline.create_pxd_pipeline(self, scope, module_name)
         result = Pipeline.run_pipeline(pipeline, source_desc)
     return result
Example #11
0
 def process_pxd(self, source_desc, scope, module_name):
     import Pipeline
     if isinstance(source_desc, FileSourceDescriptor) and source_desc._file_type == 'pyx':
         source = CompilationSource(source_desc, module_name, os.getcwd())
         result_sink = create_default_resultobj(source, self.options)
         pipeline = Pipeline.create_pyx_as_pxd_pipeline(self, result_sink)
         result = Pipeline.run_pipeline(pipeline, source)
     else:
         pipeline = Pipeline.create_pxd_pipeline(self, scope, module_name)
         result = Pipeline.run_pipeline(pipeline, source_desc)
     return result
Example #12
0
def main():
    # start socket
    TCP_IP = '128.237.198.49'
    TCP_PORT = 2002
    print('Socket Information: %s:%d' % (TCP_IP, TCP_PORT))
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.connect((TCP_IP, TCP_PORT))
    time.sleep(1e-3)


    # start camera
    vs = VideoStream(src=0).start()
    time.sleep(2.0)
    
    # calibration and find block
    #Caliberate_camera(vs)
    block_pixel_position = detect_block_grab(vs)
    block_real_position = transfer_to_real(block_pixel_position)
    print(block_pixel_position)
    print(block_real_position)

    # Inverse Kinematics
    inverse_kinematics(block_real_position,s)


    HOME_POSITION = [20,-15,20]
    roll = -math.pi/2
    traj = RPC.pipline_position_encoder_roll(HOME_POSITION, [40,-30,20], roll, s)
    traj = RPC.pipline_position_encoder_roll([40,-30,20], [40,-30,12], roll, s)
    Pipeline.C_execute([traj])
    
    
    the_block = [42, -26, 1, -1.04]
    ball_position = detect_ball(vs)
    ball_position.reverse()
    K = 0.4
    adj = 0
    for pos in ball_position:
        if pos == None:
            continue
        print(pos)
        if pos[1] > -16:
            if pos[0] > 47:
                err = pos[0]-47
                adj = err*K
            break
    print(adj)        
    #print(ball_position)
    commands = Pipeline.Adjust(the_block, adj, s)
    for command in commands:
        print(command)
    Pipeline.C_execute(commands)
    '
def main():
    try:
        debug = ast.literal_eval(sys.argv[1])
    except IndexError:
        debug = True

    if (debug):
        print("***************************************\n"
              "\t\t\t DEBUG \n"
              "***************************************\n")

    mouse_config = {
        "organism": "Mouse",
        "interaction_file": "Papers/ncomms9864-s2.xlsx"
    }
    human_config = {
        "organism": "Human",
        "interaction_file": "Papers/ncomms9864-s4.xlsx"
    }

    tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True)
    log_dir = "Datafiles_Prepare/Logs/"

    for cnfg in [mouse_config, human_config]:
        organism = cnfg["organism"]
        interaction_file = cnfg["interaction_file"]

        JsonLog.set_filename(
            utils.filename_date_append(
                Path(log_dir) /
                Path("Darnell_miRNA_target_chimeras_" + organism + ".json")))
        JsonLog.add_to_json('file name', interaction_file)
        JsonLog.add_to_json(
            'paper',
            "miRNA–target chimeras reveal miRNA 3-end pairing as a major determinant of Argonaute target specificity"
        )
        JsonLog.add_to_json('Organism', organism)
        JsonLog.add_to_json('paper_url',
                            "https://www.nature.com/articles/ncomms9864")

        org = Darnell_miRNA_target_chimeras(interaction_file,
                                            tmp_dir,
                                            organism,
                                            debug=debug)
        org.run()

        print("Pipeline start")
        p = Pipeline(paper_name="Darnell_miRNA_target_chimeras",
                     organism=organism,
                     in_df=org.prepare_for_pipeline(),
                     tmp_dir=tmp_dir)

        p.run()
Example #14
0
    def __init__(self, audiofile, strings=None, filename=None):
        self.filename = filename
        self.audiofile = audiofile
        self.touched = True

        if not strings: strings = [-5, -10, -14, -19, -24, -29]

        self.appsinkpipeline = Pipeline.AppSinkPipeline(self.audiofile)
        self.pipeline = Pipeline.Pipeline(self.audiofile)
        self.timeline = Timeline.Timeline(self, strings)
        self.timeline.show_all()
        self.control = VisualizerControl(self.pipeline)
Example #15
0
    def get_tree(self, entries_only=False, cython_scope=None):
        from AnalysedTreeTransforms import AutoTestDictTransform
        # The AutoTestDictTransform creates the statement "__test__ = {}",
        # which when copied into the main ModuleNode overwrites
        # any __test__ in user code; not desired
        excludes = [AutoTestDictTransform]

        import Pipeline, ParseTreeTransforms
        context = CythonUtilityCodeContext(self.name)
        context.prefix = self.prefix
        context.cython_scope = cython_scope
        #context = StringParseContext(self.name)
        tree = parse_from_strings(self.name,
                                  self.impl,
                                  context=context,
                                  allow_struct_enum_decorator=True)
        pipeline = Pipeline.create_pipeline(context,
                                            'pyx',
                                            exclude_classes=excludes)

        if entries_only:
            p = []
            for t in pipeline:
                p.append(t)
                if isinstance(
                        p, ParseTreeTransforms.AnalyseDeclarationsTransform):
                    break

            pipeline = p

        transform = ParseTreeTransforms.CnameDirectivesTransform(context)
        # InterpretCompilerDirectives already does a cdef declarator check
        #before = ParseTreeTransforms.DecoratorTransform
        before = ParseTreeTransforms.InterpretCompilerDirectives
        pipeline = Pipeline.insert_into_pipeline(pipeline,
                                                 transform,
                                                 before=before)

        if self.from_scope:

            def scope_transform(module_node):
                module_node.scope.merge_in(self.from_scope)
                return module_node

            transform = ParseTreeTransforms.AnalyseDeclarationsTransform
            pipeline = Pipeline.insert_into_pipeline(pipeline,
                                                     scope_transform,
                                                     before=transform)

        (err, tree) = Pipeline.run_pipeline(pipeline, tree, printtree=False)
        assert not err, err
        return tree
def calculateFalsePositiveRate(infiles, outfile):
    '''
    calculate the false positive rate in taxonomic
    abundances
    '''

    # connect to database
    dbh = sqlite3.connect(PARAMS["database"])
    cc = dbh.cursor()

    true_file = infiles[0]
    true_set = set()
    estimate_set = set()
    for estimate_file in infiles[1:]:
        if os.path.basename(estimate_file)[
                len("metaphlan_"):] == os.path.basename(true_file):
            tablenames = [
                P.toTable(os.path.basename(true_file)),
                P.toTable(os.path.basename(estimate_file))
            ]

            for species in cc.execute("""SELECT species_name FROM %s""" %
                                      tablenames[0]).fetchall():
                true_set.add(species[0])
            for species in cc.execute(
                    """SELECT taxon FROM %s WHERE taxon_level == 'species'""" %
                    tablenames[1]).fetchall():
                if species[0].find("_unclassified") != -1: continue
                estimate_set.add(species[0])

    total_estimate = len(estimate_set)
    total_true = len(true_set)

    E.info("counting false positives and false negatives")
    print(estimate_set.difference(true_set))
    nfp = len(estimate_set.difference(true_set))
    nfn = len(true_set.difference(estimate_set))
    ntp = len(estimate_set.intersection(true_set))

    E.info("writing results")
    track = P.snip(os.path.basename(true_file), ".load")
    outf = open(outfile, "w")
    outf.write("track\ttp_rate\tfp_rate\tfn_rate\n")
    outf.write("\t".join(
        map(str, [
            track,
            float(ntp) / total_estimate,
            float(nfp) / total_estimate,
            float(nfn) / total_true
        ])) + "\n")
    outf.close()
def createAlignmentBedFiles(infile, outfile):
    '''
    create bed files - the intervals are with respect to the 
    reference genome
    intervals are merged to form a non redundant alignment set
    '''
    # has to be output from show coords in tab format
    # also have to be sorted for mergeBed
    to_cluster = True
    statement = '''cat %(infile)s
                   | python %(scriptsdir)s/nucmer2bed.py -t bed4 --log=%(outfile)s.log 
                   | mergeBed -i - 
                   | gzip > %(outfile)s'''
    P.run()
def alignmentTargets(genome_files, contig_files):
    '''
    generator object to produce filenames for 
    aligning contigs to known ncbi genomes
    '''
    parameters = []
    for genome, contig in itertools.product(genome_files, contig_files):
        outfile = os.path.join(
            "alignment.dir",
            P.snip(contig, ".contigs.fa") + "_vs_" +
            P.snip(os.path.basename(genome), ".fna")) + ".delta"
        additional_input = add_inputs(contig)
        parameters.append([outfile, genome, contig])
    return parameters
def createAlignmentBedFiles(infile, outfile):
    '''
    create bed files - the intervals are with respect to the 
    reference genome
    intervals are merged to form a non redundant alignment set
    '''
    # has to be output from show coords in tab format
    # also have to be sorted for mergeBed
    to_cluster = True
    statement = '''cat %(infile)s
                   | python %(scriptsdir)s/nucmer2bed.py -t bed4 --log=%(outfile)s.log 
                   | mergeBed -i - 
                   | gzip > %(outfile)s'''
    P.run()
Example #20
0
def splitFasta( infiles, outfiles):
    '''split fasta file.'''
    
    infile = infiles[0]
    chunk_size = 500
    statement = '''
    cat %(infile)s
    | perl /ifs/devel/andreas/cgat/split_fasta.pl 
       -a blast.dir/chunk_%%s.fasta
       %(chunk_size)i 
    > split.log
    '''
    
    P.run()
def alignContigsToReference(outfile, param1, param2):
    '''
    align the contigs to the reference genomes
    using nucmer
    '''
    to_cluster = True

    reffile, contigfile = param1, param2
    pattern = P.snip(os.path.basename(outfile), ".delta")
    statement = '''nucmer -p %(pattern)s %(reffile)s %(contigfile)s'''
    P.run()
    outf = os.path.basename(outfile)
    statement = '''mv %(outf)s alignment.dir'''
    P.run()
Example #22
0
def buildMask( infile, outfile ):
    '''build seg mask for protein sequences.'''

    to_cluster = True

    statement = '''
    segmasker -in %(infile)s
              -infmt fasta 
              -parse_seqids 
              -outfmt maskinfo_asn1_bin 
              -out %(outfile)s
    >& %(outfile)s.log
    '''
    P.run()
def alignContigsToReference(outfile, param1, param2):
    '''
    align the contigs to the reference genomes
    using nucmer
    '''
    to_cluster = True

    reffile, contigfile = param1, param2
    pattern = P.snip(os.path.basename(outfile), ".delta")
    statement = '''nucmer -p %(pattern)s %(reffile)s %(contigfile)s'''
    P.run()
    outf = os.path.basename(outfile)
    statement = '''mv %(outf)s alignment.dir'''
    P.run()
Example #24
0
def downloadPFAM( infile, outfiles ):
    '''download the latest PFAM domain sequence set'''
    
    outfile1, outfile2 = outfiles
    statement = '''
    wget -O %(outfile1)s "ftp://ftp.sanger.ac.uk/pub/databases/Pfam/current_release/Pfam-A.fasta.gz";
    '''

    P.run()

    statement = '''
    wget -O %(outfile2)s "ftp://ftp.sanger.ac.uk/pub/databases/Pfam/current_release/Pfam-A.seed.gz";
    '''

    P.run()
Example #25
0
    def run(self, typeCode, endTime):

        plInstance, histData = plu.Pipeline(histInterval), None
        endTimeUNIX = utl.dateToUNIX(endTime)
        startDate = utl.getCurrentDateStr()
        priorDate = utl.datetimeDiff(startDate, 30)
        marketData = (self.ticker, self.tradeQuantity)
        systemData = (endTimeUNIX, histLag, systemLag, plInstance)

        if (self.ticker in cst.GDAX_TICKERS):
            gdaxTicker = cst.GDAX_TO_POLONIEX[self.ticker]
            histData = plInstance.getCryptoHistoricalData(
                gdaxTicker, priorDate, startDate)
        else:
            raise ValueError(
                'Bad ticker! Supported tickers are BTC, LTC, ETH.')

        self.generateTechIndObjects(histData)
        sysTuple = (marketData, systemData)

        if (typeCode == "BT"):
            from Pipeline import indsToDF
            techDF = indsToDF(self.techInds)
            positionData = ()
            return self.loopBacktestLogic(positionData, histData, techDF)

        if (typeCode == "PT"):
            self.loopPaperTradeLogic(*sysTuple, histData)
            return self.endPaperTrading(endCode, sysTuple)
def loadCoverageData(infile, outfile):
    '''
    load coverage data into database
    '''
    to_cluster = True
    tablename = P.toTable(outfile)
    database = os.path.join(PARAMS["results_resultsdir"], PARAMS["database"])
    dbh = sqlite3.connect(database)
    cc = dbh.cursor()
    temp = P.getTempFile()
    temp.write("contig_id\tacoverage\n")
    for data in cc.execute("""SELECT contig_id, AVG(coverage) FROM %s GROUP BY contig_id""" % tablename).fetchall():
        temp.write("\t".join(list(data)) + "\n")
    temp.close()
    P.load(temp.name, outfile)
    os.unlink(temp.name)
Example #27
0
def prepareDatabase( infiles, outfile ):
    '''prepare the blast database.'''

    fastafile, maskfile = infiles
    to_cluster = True
    statement = '''
    makeblastdb 
            -in %(fastafile)s
            -dbtype prot 
            -parse_seqids
            -mask_data %(maskfile)s
            -out nrdb50
            -title "Uniref Protein Database"
    >& %(outfile)s
    '''
    P.run()
Example #28
0
def removeBlastUnfinished( infiles, outfile ):
    '''remove aborted blast runs.'''

    deleted = 0

    for infile in infiles:
        line = IOTools.getLastLine( infile )
        
        if not re.search( "job finished", line ):
            fn = infile[:-len(".log")]
            if os.path.exists( fn ):
                P.info("deleting %s" % fn )
                os.unlink( fn )
                deleted += 1

    P.info("deleted %i files" % deleted)
Example #29
0
    def __init__(self, dbReference, connection):

        super().__init__(dbReference, connection)
        self.formatterInstance = plu.Formatter()
        self.gdaxPublicClient = gdax.PublicClient()
        self.spotDataRef = self.dbReference.table('SpotData')
        self.techIndsRef = self.dbReference.table('TechIndicators')
        self.spotPrice, self.spotVolume = (None, ) * 2
    def test_normalize_row(self):
        input_list = ["1853","1901","1909–27","1800–1900","1867","ca. 1785","1795–1810"]

        output_dict = {}
        for row in input_list:
            date_range = Pipeline.normalize_row(cleaned_row[21])
            self.assertEqual(4, date_range[0])
            self.assertEqual(4, date_range[1])
def main(start_fold, gpu):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
    sess = tf.Session(config=config)
    set_session(
        sess)  # set this TensorFlow session as the default session for Keras

    GetData = DataGenerator(dataset_mode='lr')
    CV = Pipeline(GetData,
                  DL_model,
                  start_fold,
                  gpu,
                  model_name=MODEL_PATH + 'LSTM_model_lr')
    score = CV.train()
    log.info(f'Model accuracy = {score}')
Example #32
0
    def get_tree(self, entries_only=False, cython_scope=None):
        from AnalysedTreeTransforms import AutoTestDictTransform
        # The AutoTestDictTransform creates the statement "__test__ = {}",
        # which when copied into the main ModuleNode overwrites
        # any __test__ in user code; not desired
        excludes = [AutoTestDictTransform]

        import Pipeline, ParseTreeTransforms
        context = CythonUtilityCodeContext(self.name)
        context.prefix = self.prefix
        context.cython_scope = cython_scope
        #context = StringParseContext(self.name)
        tree = parse_from_strings(self.name, self.impl, context=context,
                                  allow_struct_enum_decorator=True)
        pipeline = Pipeline.create_pipeline(context, 'pyx', exclude_classes=excludes)

        if entries_only:
            p = []
            for t in pipeline:
                p.append(t)
                if isinstance(p, ParseTreeTransforms.AnalyseDeclarationsTransform):
                    break

            pipeline = p

        transform = ParseTreeTransforms.CnameDirectivesTransform(context)
        # InterpretCompilerDirectives already does a cdef declarator check
        #before = ParseTreeTransforms.DecoratorTransform
        before = ParseTreeTransforms.InterpretCompilerDirectives
        pipeline = Pipeline.insert_into_pipeline(pipeline, transform,
                                                 before=before)

        if self.from_scope:
            def scope_transform(module_node):
                module_node.scope.merge_in(self.from_scope)
                return module_node

            transform = ParseTreeTransforms.AnalyseDeclarationsTransform
            pipeline = Pipeline.insert_into_pipeline(pipeline, scope_transform,
                                                     before=transform)

        (err, tree) = Pipeline.run_pipeline(pipeline, tree, printtree=False)
        assert not err, err
        return tree
Example #33
0
def run_pipeline(source, options, full_module_name=None, context=None):
    import Pipeline

    source_ext = os.path.splitext(source)[1]
    options.configure_language_defaults(source_ext[1:])  # py/pyx
    if context is None:
        context = options.create_context()

    # Set up source object
    cwd = os.getcwd()
    abs_path = os.path.abspath(source)
    full_module_name = full_module_name or context.extract_module_name(
        source, options)

    if options.relative_path_in_code_position_comments:
        rel_path = full_module_name.replace('.', os.sep) + source_ext
        if not abs_path.endswith(rel_path):
            rel_path = source  # safety measure to prevent printing incorrect paths
    else:
        rel_path = abs_path
    source_desc = FileSourceDescriptor(abs_path, rel_path)
    source = CompilationSource(source_desc, full_module_name, cwd)

    # Set up result object
    result = create_default_resultobj(source, options)

    if options.annotate is None:
        # By default, decide based on whether an html file already exists.
        html_filename = os.path.splitext(result.c_file)[0] + ".html"
        if os.path.exists(html_filename):
            line = codecs.open(html_filename, "r", encoding="UTF-8").readline()
            if line.startswith(u'<!-- Generated by Cython'):
                options.annotate = True

    # Get pipeline
    if source_ext.lower() == '.py' or not source_ext:
        pipeline = Pipeline.create_py_pipeline(context, options, result)
    else:
        pipeline = Pipeline.create_pyx_pipeline(context, options, result)

    context.setup_errors(options, result)
    err, enddata = Pipeline.run_pipeline(pipeline, source)
    context.teardown_errors(err, options, result)
    return result
def plotRelativeAbundanceCorrelations(infiles, outfile):
    '''
    plot the correlation between the estimated 
    relative abundance of species and the true
    relative abundances - done on the shared set
    '''
    # connect to database
    dbh = sqlite3.connect(PARAMS["database"])
    cc = dbh.cursor()

    true_file = infiles[0]
    temp = P.getTempFile()
    temp.write("true\testimate\n")
    for estimate_file in infiles[1:]:
        if os.path.basename(estimate_file)[
                len("metaphlan_"):] == os.path.basename(true_file):
            tablenames = [
                P.toTable(os.path.basename(true_file)),
                P.toTable(os.path.basename(estimate_file))
            ]
            # get data
            statement = """SELECT a.relab, b.rel_abundance
                           FROM %s as a, %s as b
                           WHERE b.taxon_level == "species"
                           AND a.species_name == b.taxon""" % (tablenames[0],
                                                               tablenames[1])
            for data in cc.execute(statement).fetchall():
                true, estimate = data[0], data[1]
                temp.write("%f\t%f\n" % (true, estimate))
    temp.close()
    print(temp.name)

    inf = temp.name
    R('''data <- read.csv("%s", header = T, stringsAsFactors = F, sep = "\t")'''
      % inf)
    R('''png("%s")''' % outfile)
    main_name = P.snip(outfile, ".png")
    R('''data$estimate <- data$estimate/100''')
    R('''plot(data$estimate, data$true, pch = 16, main = "%s", xlab = "estimated relative abundance", ylab = "observed relative abundance")'''
      % main_name)
    R('''text(0.05, y = 0.35, labels = paste("r = ", round(cor(data$estimate, data$true),2)), cex = 2)'''
      )
    R["dev.off"]()
    os.unlink(inf)
def filterContigsByCoverage(infiles, outfile):
    '''
    filter contigs by their average base coverage
    '''
    fcoverage = PARAMS["coverage_filter"]
    contig_file = infiles[0]
    dbh = sqlite3.connect(PARAMS["database"])
    cc = dbh.cursor()
    for infile in infiles[1:]:
        print contig_file, P.snip(os.path.basename(infile), ".load")
def filterContigsByCoverage(infiles, outfile):
    '''
    filter contigs by their average base coverage
    '''
    fcoverage = PARAMS["coverage_filter"]
    contig_file = infiles[0]
    dbh = sqlite3.connect(PARAMS["database"])
    cc = dbh.cursor()
    for infile in infiles[1:]:
        print(contig_file, P.snip(os.path.basename(infile), ".load"))
Example #37
0
def checkBlastRuns( infiles, outfile ):
    '''check if output files are complete.
    '''
    
    outf = IOTools.openFile( outfile, "w" )

    outf.write( "chunkid\tquery_first\tquery_last\tfound_first\tfound_last\tfound_total\tfound_results\thas_finished\tattempts\t%s\n" %\
                    "\t".join(Logfile.RuntimeInformation._fields))

    for infile in infiles:
        E.debug( "processing %s" % infile)
        chunkid = P.snip( os.path.basename( infile ), ".blast.gz" )
        logfile = infile + ".log"
        chunkfile = P.snip( infile, ".blast.gz" ) + ".fasta"

        with IOTools.openFile( infile ) as inf:
            l = inf.readline()
            ids = set()
            total_results = 0
            for l in inf:
                if l.startswith("#//"): continue
                ids.add( int(l.split("\t")[0] ) )
                total_results += 1
            found_first = min(ids)
            found_last = max(ids)
            found_total = len(ids)

        l = IOTools.getFirstLine( chunkfile )
        query_first = l[1:-1]
        l2 = IOTools.getLastLine( chunkfile, nlines = 2).split("\n")
        query_last = l2[0][1:]

        logresults = Logfile.parse( logfile )
        
        outf.write( "\t".join( map(str, (\
                        chunkid, query_first, query_last,
                        found_first, found_last,
                        found_total, total_results,
                        logresults[-1].has_finished,
                        len(logresults),
                        "\t".join( map(str, logresults[-1]) ) ) ) ) + "\n" )
        
    outf.close()
def chimeraTargets(alignment_files, contig_files):
    '''
    generator object to produce filenames for 
    scoring chimericity
    '''
    parameters = []
    for alignment, contig in itertools.product(genome_files, contig_files):
        outfile = os.path.join("chimeras.dir", P.snip(alignment, ".bam") + ".chimeras")
        parameters.append( [outfile, alignment, contig] )
    return parameters
Example #39
0
File: Main.py Project: 87/cython
def run_pipeline(source, options, full_module_name=None, context=None):
    import Pipeline

    source_ext = os.path.splitext(source)[1]
    options.configure_language_defaults(source_ext[1:]) # py/pyx
    if context is None:
        context = options.create_context()

    # Set up source object
    cwd = os.getcwd()
    abs_path = os.path.abspath(source)
    full_module_name = full_module_name or context.extract_module_name(source, options)

    if options.relative_path_in_code_position_comments:
        rel_path = full_module_name.replace('.', os.sep) + source_ext
        if not abs_path.endswith(rel_path):
            rel_path = source # safety measure to prevent printing incorrect paths
    else:
        rel_path = abs_path
    source_desc = FileSourceDescriptor(abs_path, rel_path)
    source = CompilationSource(source_desc, full_module_name, cwd)

    # Set up result object
    result = create_default_resultobj(source, options)

    if options.annotate is None:
        # By default, decide based on whether an html file already exists.
        html_filename = os.path.splitext(result.c_file)[0] + ".html"
        if os.path.exists(html_filename):
            line = codecs.open(html_filename, "r", encoding="UTF-8").readline()
            if line.startswith(u'<!-- Generated by Cython'):
                options.annotate = True

    # Get pipeline
    if source_ext.lower() == '.py' or not source_ext:
        pipeline = Pipeline.create_py_pipeline(context, options, result)
    else:
        pipeline = Pipeline.create_pyx_pipeline(context, options, result)

    context.setup_errors(options, result)
    err, enddata = Pipeline.run_pipeline(pipeline, source)
    context.teardown_errors(err, options, result)
    return result
    def run_pipeline(self):
        with open(self.data_file, encoding='utf8') as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=',')
            line_num = 0


            for row in csv_reader:

                if line_num == 0:

                    self.setup_tables(row, insert_columns) # Create both table here with column names with headers available in 0th row
                    line_num += 1
                else:
                    cleaned_row = Pipeline.data_cleanup(row) # Step 1: data cleanup for first column

                    if cleaned_row is not None:  # Step 2: Normalize the date column

                        date_range = Pipeline.normalize_row(cleaned_row[21])

                        insert_stmt = "INSERT INTO data_normalized(" + insert_columns_a + ") VALUES(" \
                                      + '"{0}"'.format('", "'.join(cleaned_row)) + ",\"" \
                                      + date_range[0]+ "\", \"" + date_range[1] +"\")"

                        self.dbobj.insert_table(insert_stmt)

                        Pipeline.running_total(cleaned_row, self.running_total_dict) # Step 3: Calculate running_total

                    line_num += 1
                    #if line_num == 2000:
                        #break;

            # Populate classification_totals table with calculated running totals
            for classification, totals in self.running_total_dict.items():
                insert_stmt = "INSERT INTO classification_totals(Classification, Totals) VALUES (\"" \
                              + classification +"\",\""+ str(totals) +"\")"

                self.dbobj.insert_table(insert_stmt)

            self.dbobj.commit()
            #self.dbobj.select_table("data_normalized")
            #self.dbobj.select_table("classification_totals")
            self.dbobj.close_connection()
Example #41
0
def main():
    try:
        debug = ast.literal_eval(sys.argv[1])
    except IndexError:
        debug = True

    if (debug):
        print("***************************************\n"
              "\t\t\t DEBUG \n"
              "***************************************\n")

    interaction_file = str(Path("Papers/1-s2.0-S1097276516305214-mmc3.xlsx"))
    log_dir = "Datafiles_Prepare/Logs/"
    tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True)

    organisms = ["Celegans"]
    for organism in organisms:
        JsonLog.set_filename(
            utils.filename_date_append(
                Path(log_dir) /
                Path("Pairing_Beyond_Seed_" + organism + ".json")))
        JsonLog.add_to_json('file name', interaction_file)
        JsonLog.add_to_json(
            'paper',
            "Pairing beyond the Seed Supports MicroRNA Targeting Specificity")
        JsonLog.add_to_json('Organism', organism)
        JsonLog.add_to_json(
            'paper_url',
            "https://www.sciencedirect.com/science/article/pii/S1097276516305214#mmc3"
        )

        ce = Pairing_Beyond_Seed(input_file=interaction_file,
                                 organism=organism,
                                 tmp_dir=tmp_dir,
                                 debug=debug)
        ce.run()

        p = Pipeline(paper_name="Pairing_Beyond_Seed",
                     organism=organism,
                     in_df=ce.prepare_for_pipeline(),
                     tmp_dir=tmp_dir)
        p.run()
Example #42
0
def main():
    try:
        debug = ast.literal_eval(sys.argv[1])
    except IndexError:
        debug = True

    if (debug):
        print("***************************************\n"
              "\t\t\t DEBUG \n"
              "***************************************\n")

    interaction_file = str(Path("Papers/41598_2017_7880_MOESM4_ESM.csv"))
    log_dir = "Datafiles_Prepare/Logs/"
    tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True)

    organisms = ["Cow"]
    for organism in organisms:
        JsonLog.set_filename(
            utils.filename_date_append(
                Path(log_dir) /
                Path("Global_Mapping_Cattle_" + organism + ".json")))
        JsonLog.add_to_json('file name', interaction_file)
        JsonLog.add_to_json(
            'paper',
            "Global mapping of miRNA-target interactions in cattle (Bos taurus)"
        )
        JsonLog.add_to_json('Organism', organism)
        JsonLog.add_to_json(
            'paper_url',
            "https://www.nature.com/articles/s41598-017-07880-8#MOESM1")

        cow = Global_Mapping_Cattle(input_file=interaction_file,
                                    tmp_dir=tmp_dir,
                                    debug=debug)

        cow.run()

        p = Pipeline(paper_name="Global_Mapping_Cattle",
                     organism=organism,
                     in_df=cow.prepare_for_pipeline(),
                     tmp_dir=tmp_dir)
        p.run()
def alignmentTargets(genome_files, contig_files):
    '''
    generator object to produce filenames for 
    aligning contigs to known ncbi genomes
    '''
    parameters = []
    for genome, contig in itertools.product(genome_files, contig_files):
        outfile = os.path.join("alignment.dir", P.snip(contig, ".contigs.fa") + "_vs_"  + P.snip(os.path.basename(genome), ".fna")) + ".delta"
        additional_input = add_inputs(contig)
        parameters.append( [outfile, genome, contig] )
    return parameters
Example #44
0
def mergeBlast( infiles, outfile ):
    '''merge blast results into a single file.'''

    to_cluster = True

    files = [ (int(re.match( ".*chunk_(\d+).blast.gz", x).groups()[0]), x) for x in infiles ]
    files.sort()

    files = " ".join( [ x[1] for x in files ] )

    statement = '''zcat %(files)s | awk '$1 == "query_nid" { if(a){ next;} a=1; } {print}' | gzip > %(outfile)s'''
    P.run()

    files = [ (int(re.match( ".*chunk_(\d+).blast.gz.log", x).groups()[0]), x) for x in infiles ]
    files.sort()

    files = " ".join( [ x[1] for x in files ] )

    statement = '''cat %(files)s  >> %(outfile)s.log'''
    P.run()
Example #45
0
def inverse_kinematics(block_real_position, s):
    put_index = 0
    for key in block_real_position:
        grab_position = block_real_position[key]
        if(grab_position[2] > 0):
            grab_roll = grab_position[2]  - 180
        else:
            grab_roll = grab_position[2]
        grab_roll = grab_roll/180*math.pi
        grab_position = [grab_position[0][0]-5,grab_position[0][1],1]
        put_position = block_put_position[put_index]
        put_roll = put_position[3]
        put_position = put_position[0:3]
        commands = Pipeline.classical_combi(grab_position, put_position, grab_roll, put_roll, s)
        Pipeline.C_execute(commands)
        put_index += 1
    HOME_POSITION = [20,-15,20]
    END_POSITION = [40,20,20]
    traj = RPC.pipline_position_encoder(HOME_POSITION,END_POSITION,s)
    Pipeline.C_execute([traj])
Example #46
0
def main(start_fold, gpu, batch, add_trend, freq_enc):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
    sess = tf.Session(config=config)
    set_session(
        sess)  # set this TensorFlow session as the default session for Keras
    if add_trend:
        log.info('Will add trend to XEEK Train data')
    GetData = DataGenerator(add_trend=add_trend, dataset_mode='ud')
    CV = Pipeline(GetData,
                  DL_model,
                  start_fold,
                  gpu,
                  batch,
                  model_name=MODEL_PATH + 'LSTM_model_ud')
    score = CV.train(freq_encoder=freq_enc)
    log.info(f'Model accuracy = {score}')
Example #47
0
 def __init__(self):
     self.UrlAndIDContr = URLSchedul.UrlManager()
     self.downloader = Download.Downloader()
     self.parser = Html_pareser.HtmlPare()
     self.ProceClean = Pipeline.pinline()
     self.outjson = FeeDExport.FeedExp()
     self.CollectAllData={}
     self.errGeoGet = []
     msgKeys = ["geo_code","latitude","longitude","nation","province","city","district","street","street_number"]
     for k in msgKeys:
         self.CollectAllData[k] = []
Example #48
0
def mapSCOP( infile, outfile ):
    '''map scop against sequence database.
    '''

    to_cluster = True
    max_evalue = 0.00001
    num_results = 100
    mask = 21
    # upper case is critical, otherwise traceback fails!?
    matrix = "BLOSUM50"
    gop = 12
    gep = 2
    dbname = "/tmp/blast/nrdb50"
    num_jobs = 8
    
    job_options = '-pe dedicated %i -R y' % num_jobs

    statement = '''
    /ifs/devel/andreas/cgat/run.py --log=%(outfile)s.log
    'blastp
       -query %(infile)s
       -db %(dbname)s
       -evalue %(max_evalue)f
       -num_alignments %(num_results)i
       -num_descriptions %(num_results)i
       -db_soft_mask %(mask)i
       -matrix %(matrix)s
       -gapopen %(gop)i
       -gapextend %(gep)i
       -num_threads %(num_jobs)i
       -outfmt "6 qseqid qstart qend sseqid sstart send evalue bitscore pident score qseq sseq"
    | python /ifs/devel/andreas/cgat/blast2table.py 
        --alignment-format=blocks
    | gzip
    > %(outfile)s';
    checkpoint;
    echo "#//" | gzip >> %(outfile)s
    '''

    P.run()
def collectGenomeSizes(infile, outfile):
    '''
    output the genome sizes for each genome
    '''
    to_cluster = True
    outf = open(outfile, "w")
    outf.write("genome\tlength\n")
    # assume single fasta entry
    for fasta in FastaIterator.iterate(IOTools.openFile(infile)):
        name = P.snip(os.path.basename(infile), ".fna")
        length = len(list(fasta.sequence))
        outf.write("%s\t%s\n" % (name, str(length)))
    outf.close()
Example #50
0
    def runTest(self):
        alignments, alignments_score = self.aligner.align(
            self.p_str_tokens, self.h_str_tokens, self.weights)

        #print 'Alignments:\n'
        for a in alignments:
            print a

        prediction = Pipeline.get_entailment(
            self.p_str_tokens, self.h, alignments)
        logging.info('Target: %s' % self.target)
        logging.info('Prediction: %s' % prediction)
        self.assertEqual(prediction, self.target)
def calculateFalsePositiveRate(infiles, outfile):
    '''
    calculate the false positive rate in taxonomic
    abundances
    '''

    # connect to database
    dbh = sqlite3.connect(PARAMS["database"])
    cc = dbh.cursor()

    true_file = infiles[0]
    true_set = set()
    estimate_set = set()
    for estimate_file in infiles[1:]:
        if os.path.basename(estimate_file)[len("metaphlan_"):] == os.path.basename(true_file):
            tablenames = [P.toTable(os.path.basename(true_file)), P.toTable(os.path.basename(estimate_file))]

            for species in cc.execute("""SELECT species_name FROM %s""" % tablenames[0]).fetchall():
                true_set.add(species[0])
            for species in cc.execute("""SELECT taxon FROM %s WHERE taxon_level == 'species'""" % tablenames[1]).fetchall():
                if species[0].find("_unclassified") != -1: continue
                estimate_set.add(species[0])
    
    total_estimate = len(estimate_set)
    total_true = len(true_set)

    E.info("counting false positives and false negatives")
    print estimate_set.difference(true_set)
    nfp = len(estimate_set.difference(true_set))
    nfn = len(true_set.difference(estimate_set))
    ntp = len(estimate_set.intersection(true_set))

    E.info("writing results")
    track = P.snip(os.path.basename(true_file), ".load")
    outf = open(outfile, "w")
    outf.write("track\ttp_rate\tfp_rate\tfn_rate\n")
    outf.write("\t".join(map(str, [track, float(ntp)/total_estimate, float(nfp)/total_estimate, float(nfn)/total_true])) + "\n")
    outf.close()
Example #52
0
def runBlast( infile, outfile ):
    '''run blast
    '''
    
    to_cluster = True
    max_evalue = 1.0
    num_results = 1000000
    mask = 21
    dbsize = 1500000000
    # upper case is critical, otherwise traceback fails!?
    matrix = "BLOSUM50"
    gop = 12
    gep = 2
    dbname = "/tmp/blast/nrdb50"

    statement = '''
    /ifs/devel/andreas/cgat/run.py --log=%(outfile)s.log
    'blastp
       -query %(infile)s
       -db %(dbname)s
       -evalue %(max_evalue)f
       -num_alignments %(num_results)i
       -num_descriptions %(num_results)i
       -db_soft_mask %(mask)i
       -matrix %(matrix)s
       -gapopen %(gop)i
       -gapextend %(gep)i
       -outfmt "6 qseqid qstart qend sseqid sstart send evalue bitscore pident score qseq sseq"
    | python /ifs/devel/andreas/cgat/blast2table.py 
        --alignment-format=blocks
    | gzip
    > %(outfile)s';
    checkpoint;
    echo "#//" | gzip >> %(outfile)s
    '''

    P.run()
Example #53
0
    def runTest(self):
        start = time()
        alignments, alignments_score = self.aligner.align(
            self.p_str_tokens, self.h_str_tokens, self.weights)
        print "Alignment %s" % (time() - start)
        #print 'Alignments:\n'
        for a in alignments:
            print a

        prediction = Pipeline.get_entailment(
            self.p_str_tokens, self.h, alignments)
        logging.info('Target: %s' % self.target)
        logging.info('Prediction: %s' % prediction)
        print 'Answer: %s' % self.answer[prediction]
        self.assertEqual(prediction, self.target)
    def get_tree(self):
        from AnalysedTreeTransforms import AutoTestDictTransform
        # The AutoTestDictTransform creates the statement "__test__ = {}",
        # which when copied into the main ModuleNode overwrites
        # any __test__ in user code; not desired
        excludes = [AutoTestDictTransform]

        import Pipeline, ParseTreeTransforms
        context = CythonUtilityCodeContext(self.name)
        context.prefix = self.prefix
        #context = StringParseContext(self.name)
        tree = parse_from_strings(self.name, self.pyx, context=context)
        pipeline = Pipeline.create_pipeline(context, 'pyx', exclude_classes=excludes)

        transform = ParseTreeTransforms.CnameDirectivesTransform(context)
        # InterpretCompilerDirectives already does a cdef declarator check
        #before = ParseTreeTransforms.DecoratorTransform
        before = ParseTreeTransforms.InterpretCompilerDirectives
        pipeline = Pipeline.insert_into_pipeline(pipeline, transform,
                                                 before=before)

        (err, tree) = Pipeline.run_pipeline(pipeline, tree)
        assert not err, err
        return tree
def buildAlignmentSizes(infiles, outfile):
    '''
    use bed files to sum the total number of bases
    that are aligned to the genomes
    '''
    outf = open(outfile, "w")
    outf.write("genome\tsize\n")
    for infile in infiles:
        genome = P.snip(os.path.basename(infile), ".bed.gz")
        c = 0
        inf = IOTools.openFile(infile)
        for bed in Bed.iterator(inf):
            c += bed.end - bed.start
        outf.write("%s\t%s\n" % (genome, str(c)))
    outf.close()
def plotRelativeAbundanceCorrelations(infiles, outfile):
    '''
    plot the correlation between the estimated 
    relative abundance of species and the true
    relative abundances - done on the shared set
    '''
    # connect to database
    dbh = sqlite3.connect(PARAMS["database"])
    cc = dbh.cursor()

    true_file = infiles[0]
    temp = P.getTempFile()
    temp.write("true\testimate\n")
    for estimate_file in infiles[1:]:
        if os.path.basename(estimate_file)[len("metaphlan_"):] == os.path.basename(true_file):
            tablenames = [P.toTable(os.path.basename(true_file)), P.toTable(os.path.basename(estimate_file))]
            # get data
            statement = """SELECT a.relab, b.rel_abundance
                           FROM %s as a, %s as b
                           WHERE b.taxon_level == "species"
                           AND a.species_name == b.taxon""" % (tablenames[0], tablenames[1])
            for data in cc.execute(statement).fetchall():
                true, estimate = data[0], data[1]
                temp.write("%f\t%f\n" % (true, estimate))
    temp.close()
    print temp.name

    inf = temp.name
    R('''data <- read.csv("%s", header = T, stringsAsFactors = F, sep = "\t")''' % inf)
    R('''png("%s")''' % outfile)
    main_name = P.snip(outfile, ".png")
    R('''data$estimate <- data$estimate/100''')
    R('''plot(data$estimate, data$true, pch = 16, main = "%s", xlab = "estimated relative abundance", ylab = "observed relative abundance")''' % main_name)
    R('''text(0.05, y = 0.35, labels = paste("r = ", round(cor(data$estimate, data$true),2)), cex = 2)''')
    R["dev.off"]()
    os.unlink(inf)
Example #57
0
###################################################################
###################################################################
###################################################################
# Run configuration script

from SphinxReport.Utils import PARAMS as P
EXPORTDIR=P['medip_exportdir']
DATADIR=P['medip_datadir']
DATABASE=P['medip_backend']

###################################################################
# cf. pipeline_medip.py
# This should be automatically gleaned from pipeline_chipseq.py
###################################################################
import Pipeline
PARAMS_PIPELINE = Pipeline.peekParameters( ".",
                                           "pipeline_medip.py" )

import PipelineTracks

Sample = PipelineTracks.Sample3

suffixes = ["export.txt.gz",
            "sra",
            "fastq.gz",
            "fastq.1.gz",
            "csfasta.gz" ]

TRACKS = sum( itertools.chain( [ PipelineTracks.Tracks( Sample ).loadFromDirectory( 
                [ x for x in glob.glob( "%s/*.%s" % (DATADIR, s) ) if "input" not in x],
                "%s/(\S+).%s" % (DATADIR, s) ) for s in suffixes ] ), 
              PipelineTracks.Tracks( Sample ) )
def runCharacterize(cwd, rabin, refcmap, contigdir, contigbase, runaligns, xmappath, optargs, nthreads):
    '''Load Pipeline files from first arg; configure CharacterizeModule; run alignments if runaligns;
    report on those alignments or the xmap provided as xmappath.
    '''

    printargs = True

    if not os.path.isfile(os.path.join(cwd,"utilities.py")):
        print "utilities.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
        sys.exit(1)
    import utilities as util

    if not util.checkFile(os.path.join(cwd,"Pipeline.py")):
        print "Pipeline.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
        sys.exit(1)
    import Pipeline

    if not util.checkFile(os.path.join(cwd,"CharacterizeModule.py")):
        print "CharacterizeModule.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
        sys.exit(1)
    import CharacterizeModule as cm

    #if not util.checkFile(os.path.join(cwd,"MapClassesRev.py")):
    #    print "MapClassesRev.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
    #    sys.exit(1)
    #import MapClassesRev

    #use Pipeline objects

    varsP = Pipeline.varsPipeline()

    varsP.optArgumentsFileIn   = optargs
    varsP.RefAlignerBin        = rabin
    varsP.latestMergedCmap     = os.path.join(contigdir, contigbase+".cmap") #file suffix required to be .cmap
    varsP.contigFolder         = contigdir
    varsP.nThreads             = nthreads #necessary otherwise job won't start
    varsP.ref                  = refcmap
    varsP.stdoutlog            = True #enable -stdout -stderr args to RefAligner
    varsP.curCharacterizeCmaps = [varsP.latestMergedCmap]

    if runaligns :
        varsP.contigAlignTarget = contigdir+"/alignref" #this is output dir
        varsP.runSV = False
        varsP.groupContigs = False
        varsP.stageComplete = contigbase
        varsP.outputContigFolder = contigdir
        varsP.memoryLogpath  = os.path.join(contigdir, "memory_log.txt")
        varsP.pipeReportFile = os.path.join(contigdir, "pipeReport.txt")
        varsP.parseArguments() #parses optArgumentsFile
        if printargs :
            print "\nRunning Characterization with arguments:\n" + " ".join(varsP.argsListed('characterizeDefault')) + '\n'
        if hasattr(util, "InitStatus") : #if old version, skip
            util.InitStatus(os.path.join(contigdir, "status.xml")) #needed otherwise call to status_log fails
        charmod = cm.Characterize(varsP) #create Characterize object from CharacterizeModule -- this also calls generateJobList
        xmappath = charmod.xmapTarget #set in Characterize.generateJobList
        charmod.runJobs()
    else :
        #varsP.contigAlignTarget = contigdir #this is dir in which _q and _r cmaps must be located -- contigdir is from cmap; this should be from xmap
        varsP.contigAlignTarget = os.path.split(xmappath)[0]
        print "Loading alignments from\n" + xmappath + "\n"

    #no longer using this in Pipeline
    #print MapClassesRev.TopLevelCharacterization(varsP, [os.path.join(varsP.contigAlignTarget, contigbase)])

    print cm.characterizeContigs(varsP, xmappath)