Esempio n. 1
0
 def _para(self, *paraArgs):
     """ssh to the remote machine and run the para command.  paraArgs are
     passed as arguments to the para command. Returns stdout as a list of
     lines, stderr in ProcException if the remote program encouners an
     error. There is a possibility for quoting hell here."""
     remCmd = "cd " + self.paraDir + "; para " + " ".join(paraArgs)
     fileOps.prLine(sys.stderr, "ssh ", self.paraHost, " ", remCmd)
     return procOps.callProcLines(["ssh", "-o", "ClearAllForwardings=yes", self.paraHost, remCmd])
def build_attributes(database, name, out_dir):
    header = '\t'.join(['GeneId', 'GeneName', 'GeneType', 'TranscriptId', 'TranscriptType']) + '\n'
    source_cmd = ['hgsql', '-Ne', 'select * from ensemblSource', database]
    source = dict(x.split() for x in callProcLines(source_cmd))
    genes_cmd = ['hgsql', '-Ne', 'select * from ensemblToGeneName', database]
    genes = dict(x.split() for x in callProcLines(genes_cmd))
    transcripts_cmd = ['hgsql', '-Ne', 'select transcript, gene from ensGtp', database]
    transcripts = dict(x.split() for x in callProcLines(transcripts_cmd))
    r = []
    for transcript_id, gene_id in transcripts.iteritems():
        gene_name = genes.get(transcript_id, 'NoName')
        biotype = source[transcript_id]
        r.append([gene_id, gene_name, biotype, transcript_id, biotype])
    with open(os.path.join(out_dir, name + '.tsv'), 'w') as outf:
        outf.write(header)
        for x in sorted(r, key=lambda x: x[0]):
            outf.write('\t'.join(x) + '\n')
def extract_newick_genomes_cactus(hal):
    """
    Parse the cactus config file, extracting just the newick tree
    """
    cmd = ['halStats', '--tree', hal]
    newick = callProcLines(cmd)[0]
    t = ete3.Tree(newick, format=1)
    genomes = tuple(t.get_leaf_names())
    return newick, genomes
Esempio n. 4
0
 def _para(self, *paraArgs):
     """ssh to the remote machine and run the para command.  paraArgs are
     passed as arguments to the para command. Returns stdout as a list of
     lines, stderr in ProcException if the remote program encouners an
     error. There is a possibility for quoting hell here."""
     remCmd = "cd " + self.paraDir + "; para " + " ".join(paraArgs)
     fileOps.prLine(sys.stderr, "ssh ", self.paraHost, " ", remCmd)
     return procOps.callProcLines(
         ["ssh", "-o", "ClearAllForwardings=yes", self.paraHost, remCmd])
Esempio n. 5
0
 def testCallLines(self):
     out = procOps.callProcLines(["sort", self.getInputFile("simple1.txt")])
     self.assertEqual(out, ['five', 'four', 'one', 'six', 'three', 'two'])
Esempio n. 6
0
 def testCallLines(self):
     out = procOps.callProcLines(["sort", self.getInputFile("simple1.txt")])
     self.failUnlessEqual(out,
                          ['five', 'four', 'one', 'six', 'three', 'two'])
Esempio n. 7
0
def halStats(args):
    "Call halStats with the specified arguments, returns output as a list of lines"
    return list(procOps.callProcLines(["halStats"] + args))
def halStats(args):
    "Call halStats with the specified arguments, returns output as a list of lines"
    return list(procOps.callProcLines(["halStats"] + args))
           frozenset(["Notch2NL-D", "Notch2NL-C"]): [[15867, 74916]],
           frozenset(): [[81069, 162368], [165397, 2000000]]}



f = Fasta("stitched_alignment.fa")
results = {}
for exclude in [frozenset(), frozenset(["Notch2NL-D"]), frozenset(["Notch2NL-D", "Notch2NL-C"])]:
    t = open("tmp.fasta", "w")
    for para in sorted(set(f.keys()) - exclude):
        t.write(">{}\n{}\n".format(para, f[para]))
    t.close()
    n = '_'.join(sorted(exclude)) if len(exclude) > 0 else 'all'
    cmd = ['java', '-jar', '/cluster/home/ifiddes/jvarkit/dist-1.133/biostar94573.jar', '-R', n,
           'tmp.fasta']
    r = callProcLines(cmd)
    recs = [x.split() for x in r if not x.startswith("#")]
    results[exclude] = recs


raw_recs = []
for exclude, region in regions.iteritems():
   for start, stop in region:
    raw_recs.extend([x for x in results[exclude] if start < int(x[1]) <= stop])


# region with poor alignment
exclude_regions = [[28574, 31093]]
exclude_regions = [ChromosomeInterval('a', x[0], x[1], '.') for x in exclude_regions]
recs = []
for r in raw_recs:
def map_to_ref(fwd_filtered, chain):
    rev_unfiltered = tmpFileGet()
    cmd = ['pslMap', '-swapMap', '-chainMapFile', fwd_filtered, chain, rev_unfiltered]
    runProc(cmd)
    cmd = ['pslCDnaFilter', '-localNearBest=0.05', '-filterWeirdOverlapped', '-decayMinCover', rev_unfiltered, '/dev/stdout']
    return callProcLines(cmd)