コード例 #1
0
ファイル: sopt.py プロジェクト: alexei-matveev/pts
def test1(n):
    from numpy import array

    # One   equilibrium  of   Ar4  LJ   cluster  (in   coordinates  of
    # c2v_tetrahedron1 Func):
    w = 0.39685026
    A = array([w, w, +w])

    # Another equilibrium:
    B = array([w, w, -w])

    # Halfway between A and B:
    C = (A + B) / 2.0
    C = array([w + 0.01, w - 0.01, 0.0])

    xs = array([A, C, B])

    from test.testfuns import c2v_tetrahedron1, diagsandhight
    from path import MetricPath
    from metric import Metric
    from numpy import linspace

    z = c2v_tetrahedron1()

    # z = diagsandhight()
    # r = 1.12246195815
    # A = array([r, r,  r / sqrt(2.)])
    # B = array([r, r, -r / sqrt(2.)])
    # C = array([r, r * sqrt(2.), 0.])
    # xs = array([A, C, B])

    p = MetricPath(xs, Metric(z).norm_up)

    x0 = map(p, linspace(0., 1., n))

    from ase import Atoms
    from qfunc import QFunc
    from func import compose

    pes = compose(QFunc(Atoms("Ar4")), z)

    from rc import Volume

    vol = compose(Volume(), z)

    def callback(x, e, g, t):
        # from pts.tools.jmol import jmol_view_path
        print "energies=", e # map(pes, x)
        print "volume=", map(vol, x)
        # jmol_view_path(map(z, x), syms=["Ar"]*4, refine=1)
        pass

    print "BEFORE:"
    callback(x0, map(pes, x0), map(pes.fprime, x0), None)

    x1, info = soptimize(pes, x0, tangent1, rc=vol, callback=callback)
    # print "info=", info

    print "AFTER:"
    callback(x1, map(pes, x1), map(pes.fprime, x1), None)
コード例 #2
0
ファイル: sopt.py プロジェクト: chrinide/pts
def test1(n):
    from numpy import array

    # One   equilibrium  of   Ar4  LJ   cluster  (in   coordinates  of
    # c2v_tetrahedron1 Func):
    w = 0.39685026
    A = array([w, w, +w])

    # Another equilibrium:
    B = array([w, w, -w])

    # Halfway between A and B:
    C = (A + B) / 2.0
    C = array([w + 0.01, w - 0.01, 0.0])

    xs = array([A, C, B])

    from test.testfuns import c2v_tetrahedron1, diagsandhight
    from path import MetricPath
    from metric import Metric
    from numpy import linspace

    z = c2v_tetrahedron1()

    # z = diagsandhight()
    # r = 1.12246195815
    # A = array([r, r,  r / sqrt(2.)])
    # B = array([r, r, -r / sqrt(2.)])
    # C = array([r, r * sqrt(2.), 0.])
    # xs = array([A, C, B])

    p = MetricPath(xs, Metric(z).norm_up)

    x0 = map(p, linspace(0., 1., n))

    from ase import Atoms
    from qfunc import QFunc
    from func import compose

    pes = compose(QFunc(Atoms("Ar4")), z)

    from rc import Volume

    vol = compose(Volume(), z)

    def callback(x, e, g, t):
        # from pts.tools.jmol import jmol_view_path
        print "energies=", e  # map(pes, x)
        print "volume=", map(vol, x)
        # jmol_view_path(map(z, x), syms=["Ar"]*4, refine=1)
        pass

    print "BEFORE:"
    callback(x0, map(pes, x0), map(pes.fprime, x0), None)

    x1, info = soptimize(pes, x0, tangent1, rc=vol, callback=callback)
    # print "info=", info

    print "AFTER:"
    callback(x1, map(pes, x1), map(pes.fprime, x1), None)
コード例 #3
0
ファイル: sequenceframes.py プロジェクト: demis001/biopandas
def fqframe(fileh):
    final_schema = Schema({
        'id': str,
        'seq': str,
        'quality': str,
        'qual_ints': check_np_type('int64'),
        'error': check_np_type('float64'),
        'description': str
    })

    #get_object = _id
    index = ['id']
    columns = ('id', 'seq', 'quality', 'description', 'qual_ints', 'error')
    SANGER = True
    get_id = attr('id')
    get_seq = compose(str, attr('seq'))
    get_qual_ints = compose_all(np.array, itemgetter('phred_quality'),
                                attr('_per_letter_annotations'))
    get_description = attr('description')
    get_quality = SeqIO.QualityIO._get_sanger_quality_str
    get_error = compose(error, get_qual_ints)
    #get_error = error_from_ints(get_qual_ints)
    getters = [
        get_id, get_seq, get_quality, get_description, get_qual_ints, get_error
    ]
    assert len(getters) == len(columns)
    metadata = {'filename': fileh.name}
    iterator = get_fastq(fileh)
    get_raw_record = partial(next, iterator)

    #    def get_row(record):
    #        #record = next(fileh)
    ##        import sys
    ##        __module__ = sys.modules[__name__]
    ##        get_getter = compose(attr, "get_{0}".format)
    ##        _getters = map(get_getter, columns)
    ##        self_getters = apply_each(_getters, __module__) #fzip(_getters, repeat(__module__, clen))
    #        results = apply_each(self_getters, record)
    #        final_dict = dict(zip(columns, results))
    #        final_schema.validate(final_dict)
    #        return final_dict

    #    def load_fastq():
    #        fq = get_fastq(fileh)
    #        dicts = map(get_row, fq)
    #        return pd.DataFrame(dicts).set_index(index) #, index=index, columns=columns)

    #jreturn nameddict(
    return {
        'obj_func': get_raw_record,
        'columns': columns,
        'getters': getters,
        'validator': final_schema,
        'dictgetters': None
    }
コード例 #4
0
def fqframe(fileh):
    final_schema =  Schema({
        'id' : str,
        'seq' : str,
        'quality' : str,
        'qual_ints' : check_np_type('int64'),
        'error' : check_np_type('float64'),
        'description' : str
    })

    #get_object = _id
    index = ['id']
    columns = ('id', 'seq', 'quality', 'description', 'qual_ints', 'error')
    SANGER = True
    get_id = attr('id')
    get_seq= compose(str, attr('seq'))
    get_qual_ints = compose_all(np.array, itemgetter('phred_quality'), attr('_per_letter_annotations'))
    get_description = attr('description')
    get_quality = SeqIO.QualityIO._get_sanger_quality_str
    get_error = compose(error, get_qual_ints)
    #get_error = error_from_ints(get_qual_ints)
    getters = [get_id, get_seq, get_quality, get_description, get_qual_ints, get_error]
    assert len(getters) == len(columns)
    metadata = {'filename' : fileh.name}
    iterator = get_fastq(fileh)
    get_raw_record = partial(next, iterator)

#    def get_row(record):
#        #record = next(fileh)
##        import sys
##        __module__ = sys.modules[__name__]
##        get_getter = compose(attr, "get_{0}".format)
##        _getters = map(get_getter, columns)
##        self_getters = apply_each(_getters, __module__) #fzip(_getters, repeat(__module__, clen))
#        results = apply_each(self_getters, record)
#        final_dict = dict(zip(columns, results))
#        final_schema.validate(final_dict)
#        return final_dict

#    def load_fastq():
#        fq = get_fastq(fileh)
#        dicts = map(get_row, fq)
#        return pd.DataFrame(dicts).set_index(index) #, index=index, columns=columns)

    #jreturn nameddict(
    return { 'obj_func' : get_raw_record,
        'columns' : columns,
        'getters' : getters,
        'validator' : final_schema,
        'dictgetters' : None
    }
コード例 #5
0
ファイル: samframe.py プロジェクト: demis001/biopandas
def parse_cigar(cigar_str):
    #? makes the regex not be too greedy
    cigar_regex = r'(?:([0-9]+)([MIDNSHPX=]))+?'
    reg = re.compile(cigar_regex)
    tups = reg.findall(cigar_str)
    key, value = itemgetter(1), itemgetter(0)
    groups = groupby(sorted(tups, key=key), key)
    get_counts = pmap(compose(int, itemgetter(0)))
    sum_counts = compose(sum, get_counts)
    s = "cigar_{0}".format
    cigar_dict = dict( (s(name), sum_counts(nums)) for name, nums in groups)
    #print cigar_dict
    mismatches = sum(num for k, num in cigar_dict.items() if k not in ['cigar_M', 'cigar_='])
    return merge_dicts(cigar_dict, {'cigar_score': mismatches})
コード例 #6
0
ファイル: sequenceframes.py プロジェクト: demis001/biopandas
def fqframe(fileh):
    final_schema = Schema({
        'id': str,
        'seq': str,
        'quality': str,
        'qual_ints': check_np_type('int64'),
        'error': check_np_type('float64'),
        'description': str
    })

    #get_object = _id
    index = ['id']
    columns = ('id', 'seq', 'quality', 'description', 'qual_ints', 'error')
    SANGER = True
    get_id = attr('id')
    get_seq = compose(str, attr('seq'))
    get_qual_ints = compose_all(np.array, itemgetter('phred_quality'),
                                attr('_per_letter_annotations'))
    get_description = attr('description')
    get_quality = SeqIO.QualityIO._get_sanger_quality_str
    get_error = compose(error, get_qual_ints)

    #get_error = error_from_ints(get_qual_ints)

    def get_row(record):
        #record = next(fileh)
        print(get_funcs())
        import sys
        __module__ = sys.modules[__name__]
        get_getter = compose(attr, "get_{0}".format)
        _getters = map(get_getter, columns)
        self_getters = apply_each(
            _getters, __module__)  #fzip(_getters, repeat(__module__, clen))
        results = apply_each(self_getters, record)
        final_dict = dict(zip(columns, results))
        final_schema.validate(final_dict)
        return final_dict

    def load_fastq():
        fq = get_fastq(fileh)
        dicts = map(get_row, fq)
        return pd.DataFrame(dicts).set_index(
            index)  #, index=index, columns=columns)

    return namedtuple('FastqFrame', ['get_row', 'load_fastq'])(
        get_row, load_fastq)  #{'get_row' : get_row, 'load_fastq' : load_fastq}
コード例 #7
0
def fqframe(fileh):
    final_schema =  Schema({
        'id' : str,
        'seq' : str,
        'quality' : str,
        'qual_ints' : check_np_type('int64'),
        'error' : check_np_type('float64'),
        'description' : str
    })

    #get_object = _id
    index = ['id']
    columns = ('id', 'seq', 'quality', 'description', 'qual_ints', 'error')
    SANGER = True
    get_id = attr('id')
    get_seq= compose(str, attr('seq'))
    get_qual_ints = compose_all(np.array, itemgetter('phred_quality'), attr('_per_letter_annotations'))
    get_description = attr('description')
    get_quality = SeqIO.QualityIO._get_sanger_quality_str
    get_error = compose(error, get_qual_ints)
    #get_error = error_from_ints(get_qual_ints)

    def get_row(record):
        #record = next(fileh)
        print(get_funcs())
        import sys
        __module__ = sys.modules[__name__]
        get_getter = compose(attr, "get_{0}".format)
        _getters = map(get_getter, columns)
        self_getters = apply_each(_getters, __module__) #fzip(_getters, repeat(__module__, clen))
        results = apply_each(self_getters, record)
        final_dict = dict(zip(columns, results))
        final_schema.validate(final_dict)
        return final_dict

    def load_fastq():
        fq = get_fastq(fileh)
        dicts = map(get_row, fq)
        return pd.DataFrame(dicts).set_index(index) #, index=index, columns=columns)

    return namedtuple('FastqFrame', ['get_row', 'load_fastq'])(get_row, load_fastq)#{'get_row' : get_row, 'load_fastq' : load_fastq}
コード例 #8
0
 def get_row(record):
     #record = next(fileh)
     print(get_funcs())
     import sys
     __module__ = sys.modules[__name__]
     get_getter = compose(attr, "get_{0}".format)
     _getters = map(get_getter, columns)
     self_getters = apply_each(_getters, __module__) #fzip(_getters, repeat(__module__, clen))
     results = apply_each(self_getters, record)
     final_dict = dict(zip(columns, results))
     final_schema.validate(final_dict)
     return final_dict
コード例 #9
0
ファイル: sequenceframes.py プロジェクト: demis001/biopandas
 def get_row(record):
     #record = next(fileh)
     print(get_funcs())
     import sys
     __module__ = sys.modules[__name__]
     get_getter = compose(attr, "get_{0}".format)
     _getters = map(get_getter, columns)
     self_getters = apply_each(
         _getters, __module__)  #fzip(_getters, repeat(__module__, clen))
     results = apply_each(self_getters, record)
     final_dict = dict(zip(columns, results))
     final_schema.validate(final_dict)
     return final_dict
コード例 #10
0
ファイル: samframe.py プロジェクト: VDBWRAIR/bioframes
from operator import itemgetter
from functools import partial
import operator as op
from operator import add, div
from schema import Schema, Use
from itertools import ifilter
# Parse options
#from pyparsing import Regex

to_np_int = partial(np.array, dtype=int)
parse_array = compose_all(to_np_int, psplit(','), pstrip('[]'))
tabsplit = psplit('\t')


minus33 = partial(add, -33)
qual_int_sanger = compose(minus33, ord)
qual_to_phreds = compose(to_np_int, pmap(qual_int_sanger))
error = compose(partial(pow, 10), partial2(div, -10.0))
#don't need to map because numpy vectorizes it automatically
#TODO: handle non-sanger version
sanger_qual_str_to_error = compose(error, qual_to_phreds)

basic_scheme={
    'QNAME' : str,
    'FLAG' : int,
    'RNAME' : str,
    'POS' : int,
    'MAPQ' : int,
    'CIGAR' : str,
    'RNEXT' : str,
    'PNEXT' : int,
コード例 #11
0
ファイル: utils.py プロジェクト: averagehat/biolearn
from operator import methodcaller as mc, ne
import numpy as np
import networkx as nx
from matplotlib import pyplot as plt


def to_adj_list(G, edgekey='weight', bothways=True, as_float=False):
    edges = sorted(ifilterfalse(lambda x: x[0] ==x[1], G.edges(data=True)))
    if as_float:
        res = map(lambda x: (x[0], x[1], float(x[-1]['weight'])), edges)
        form = "{0}->{1}:{2:.3f}\n{1}->{0}:{2:.3f}".format
    else:
        res = map(lambda x: (x[0], x[1], int(x[-1]['weight'])), edges)
        form="{0}->{1}:{2}".format if not bothways else "{0}->{1}:{2}\n{1}->{0}:{2}".format
    return starmap(form, res)
adj_str = compose('\n'.join, to_adj_list)

def parseM(raw):
    '''parse & return a space-seperated matrix.'''
    _in = filter(bool, raw.split('\n'))
    return  np.matrix(map(pmap(float), map(str.split, _in)))

def quantify(iterable, pred=bool):
    '''https://docs.python.org/2/library/itertools.html#recipes
    "Count how many times the predicate is true"'''
    return sum(imap(pred, iterable))

def drawgraph(G, edgekey='weight', big=False, **kwargs):
    if big: fig = plt.figure(figsize = (15, 10))
    pos=nx.spring_layout(G)
    nx.draw_networkx(G, pos=pos, **kwargs)
コード例 #12
0
ファイル: dist.py プロジェクト: averagehat/biolearn
from func import compose, partial2
import numpy as np
from functools import partial
import operator as op
from itertools import islice, ifilter
from fn.iters import splitby
from operator import methodcaller
from func import compose_all, pmap, psplit, _not


norm_matrix = partial(np.linalg.norm, axis=1)
dist_matrix = compose(norm_matrix, op.sub)
min_dist_position = compose(np.argmin, dist_matrix)

def gravity(M):
    return M.sum(axis=0)/float(len(M))

def k_means_cluster(data, centers=None, k=None):
    assert (centers is not None or k is not None)
    if centers is None:
        centers = np.empty((k, data.shape[1]))
        centers[:] = data[:k]
    old_centers = centers.copy()

    mincenters = partial(min_dist_position, centers)
    data_by_cluster = np.apply_along_axis(mincenters, 1, data)
    for i, _ in enumerate(centers):
        cluster = data[data_by_cluster == i]
        centers[i] = gravity(cluster)
    if np.allclose(old_centers, centers):
        return centers, data_by_cluster
コード例 #13
0
ファイル: bioframes.py プロジェクト: averagehat/biopandas
def col_compare(df, col, value, comp):
    half = partial(comp, value)
    boolean = compose(half, df.__getitem__)
    return boolean(col)
コード例 #14
0
ファイル: bioframes.py プロジェクト: averagehat/biopandas
Join fastq and SAM (merge on QNAME [and SEQ])
Join VCF and SAM (merge on POS)
Pileup
Join VCF and Pileup
'''

'''
pcompose = partial(partial, compose)
error_from_ints = pcompose(error)
#sanger_qual_str_to_error = cmperror(qual_to_phreds)

'''
get_fastq = partial(SeqIO.parse, format='fastq')
get_fasta = partial(SeqIO.parse, format='fasta')
to_np_int = partial(np.array, dtype=int)
gccontent = compose(ilen, pifilter('GC'.__contains__))

minus33 = partial(add, -33)
qual_int_sanger = compose(minus33, ord)

''' Error = 10^-(Phred/10) '''
qual_to_phreds = compose(to_np_int, pmap(qual_int_sanger))
error = compose(partial(pow, 10), partial2(div, -10.0))
#don't need to map because numpy vectorizes it automatically
#TODO: handle non-sanger version
sanger_qual_str_to_error = compose(error, qual_to_phreds)




#SANGER_OFFSET = 33
コード例 #15
0
'''
Join fastq and SAM (merge on QNAME [and SEQ])
Join VCF and SAM (merge on POS)
Pileup
Join VCF and Pileup
'''
'''
pcompose = partial(partial, compose)
error_from_ints = pcompose(error)
#sanger_qual_str_to_error = cmperror(qual_to_phreds)

'''
get_fastq = partial(SeqIO.parse, format='fastq')
get_fasta = partial(SeqIO.parse, format='fasta')
to_np_int = partial(np.array, dtype=int)
gccontent = compose(ilen, pifilter('GC'.__contains__))

minus33 = partial(add, -33)
qual_int_sanger = compose(minus33, ord)
''' Error = 10^-(Phred/10) '''
qual_to_phreds = compose(to_np_int, pmap(qual_int_sanger))
error = compose(partial(pow, 10), partial2(div, -10.0))
#don't need to map because numpy vectorizes it automatically
#TODO: handle non-sanger version
sanger_qual_str_to_error = compose(error, qual_to_phreds)

#SANGER_OFFSET = 33
'''
assert len(quality) == len(error) == len(phred_scores)
'''
コード例 #16
0
ファイル: samframe.py プロジェクト: demis001/biopandas
0x1  : "template having multiple segments in sequencing",
0x2  : "each segment properly aligned according to the aligner",
0x4  : "segment unmapped",
0x8  : "next segment in the template unmapped",
0x10 : "SEQ being reverse complemented",
0x20 : "SEQ of the next segment in the template being reversed",
0x40 : "the rst segment in the template",
0x80 : "the last segment in the template",
0x100: "secondary alignment",
0x200: "not passing quality controls",
0x400: "PCR or optical duplicate",
0x800: "supplementary alignment"
}


eval_flag = compose(bool, op.and_)

def flag_dict(flag):
    return dict((meaning, eval_flag(bit, flag)) for bit, meaning in flag_meanings.items())
def split_list(A, idx):
    return A[:idx], A[idx:]

sam_columns = ("QNAME", "FLAG", "RNAME", "POS", "MAPQ", "CIGAR", "RNEXT", "PNEXT", "TLEN", "SEQ", "QUAL") #optiosn


#TODO: get_record function takes a filehandle and returns a single record via SeqIO, etc.
#So functions expect a dictionary I guess
#pass
parse_options = compose(dict, pmap(parse_option)) #, tabsplit)
#readfields = compose(tabsplit, next)
line_to_dict = compose_all(dict, partial(zip, sam_columns)) #, tabsplit)
コード例 #17
0
ファイル: assembly.py プロジェクト: averagehat/biolearn
    nx.draw_networkx(G, pos=pos, **kwargs)
    if edgekey:
        edge_labels=dict([((u,v,),d.get(edgekey, ''))
                         for u,v,d in G.edges(data=True)])
        nx.draw_networkx_edge_labels(G,pos,edge_labels=edge_labels)#, **kwargs)
    plt.show()
#NOTE: requires graphviz
    #nx.write_dot(G,'graph.dot')
#dot -Tpng graph.dot > graph.png

def info_fromkmer(kmer):
        node1, node2 = kmer[:-1], kmer[1:]
        return node1, node2, {'kmer' : kmer}


yield_pathgraph = compose(F(imap, info_fromkmer), slider)
#pathlist = compose(list, yield_pathgraph)
#use reduce
def make_debruijn(s, k=None):
    G = nx.MultiDiGraph()
    if not k:
       G.add_edges_from(imap(info_fromkmer, s))
    else:
        #build straight from string
       G.add_edges_from(yield_pathgraph(s, k))
    return G



'''
set v to some random node.
コード例 #18
0
def col_compare(df, col, value, comp):
    half = partial(comp, value)
    boolean = compose(half, df.__getitem__)
    return boolean(col)
コード例 #19
0
ファイル: sopt.py プロジェクト: chrinide/pts
def test(A, B, trafo=None):

    print "A=", A
    print "B=", B

    from pts.pes.mueller_brown import MB
    from pts.pes.mueller_brown import show_chain

    x = [A, B]

    # change coordinates:
    if trafo is not None:
        from func import compose
        MB = compose(MB, trafo)
        x = array(map(trafo.pinv, x))

    def show(x):
        if trafo is not None:
            show_chain(map(trafo, x))
        else:
            show_chain(x)

    from numpy import savetxt

    def callback(x, e, g, t):
        # savetxt("path.txt", x)
        # print "chain spacing=", spacing(x)
        pass

    from path import MetricPath  # for respacing
    from rc import Linear  # as reaction coordinate
    rcoord = Linear([1., -1.])

    from metric import Metric
    mt = Metric(rcoord)

    n = 3
    n_max = 30
    while True:
        #
        # Respace vertices based on custom metric built from the
        # definition of reaction coordinate:
        #
        p = MetricPath(x, mt.norm_up)
        x = array(map(p, linspace(0., 1., n)))

        print "BEFORE, rc(x)=", map(rcoord, x)
        show(x)

        # x = respace(x, tangent4, spacing)

        # print "RESPACE, x=", x
        # print "spacing(x)=", spacing(x)
        # show(x)

        #       x, stats = soptimize(MB, x, tangent1, spacing, maxit=20, maxstep=0.1, callback=callback)
        #       x, stats = soptimize(MB, x, tangent4, maxit=20, maxstep=0.1, callback=callback)
        x, stats = soptimize(MB,
                             x,
                             tangent4,
                             rc=rcoord,
                             maxit=20,
                             maxstep=0.1,
                             callback=callback)
        savetxt("mb-path.txt-" + str(len(x)), x)

        print "AFTER, rc(x)=", map(rcoord, x)
        show(x)

        if n < n_max:
            # double the number of beads:
            n = 2 * n + 1
        else:
            print "========================================================="
            print "Conveged for the maximal tested number of beads: ", n
            print "========================================================="
            break
コード例 #20
0
    'bats', 'beagle-lib', 'beast/beast', 'beast/BEASTv1.8.0', 'bio_pieces',
    'blast/blast-2.2.30+', 'bowtie/bowtie-2.2.5', 'bwa/bwa-0.7.12-r1044',
    'cuda/cuda', 'cuda/cuda_6.5.14', 'igv/igv-2.3.37', 'igv/igv-2.3.52',
    'mrsnbactpipeline', 'ngs_mapper/ngs_mapper-1.1',
    'ngs_mapper/ngs_mapper-1.2', 'pathdiscov/pathdiscov-4.2', 'pypbs',
    'usamriidPathDiscov', 'vdbstatus', 'ray/ray-2.3.1', 'roche/analysis',
    'roche/analysis-v2.9', 'roche/gsprocessor-v2.9', 'samtools/samtools-1.1'
]

ngs_mapper_cmd = '''
cd $PBS_O_WORKDIR
mkdir -p $(pwd)/tmp
SAMPLEDIR=/media/VD_Research/NGSData/ReadsBySample/${SAMPLENAME}
TMPDIR=$(pwd)/tmp runsample.py $SAMPLEDIR {REFPATH} {SAMPLENAME} -od {SAMPLENAME}
'''
expand_path = compose(os.path.realpath, os.path.expanduser)
''' Tab completion for directories '''


def glob_complete(text, state):
    expanded_text = expand_path(text)
    if os.path.isdir(expanded_text):
        expanded_text += '/'
    return (glob(expanded_text + '*') + [None])[state]


readline.set_completer_delims(' \t\n;')
readline.parse_and_bind("tab: complete")
readline.set_completer(glob_complete)

prompt = compose(raw_input, "{0}>".format)
コード例 #21
0
ファイル: custom.py プロジェクト: VDBWRAIR/pbs
'pypbs',
'usamriidPathDiscov',
'vdbstatus',
'ray/ray-2.3.1',
'roche/analysis',
'roche/analysis-v2.9',
'roche/gsprocessor-v2.9',
'samtools/samtools-1.1']

ngs_mapper_cmd = '''
cd $PBS_O_WORKDIR
mkdir -p $(pwd)/tmp
SAMPLEDIR=/media/VD_Research/NGSData/ReadsBySample/${SAMPLENAME}
TMPDIR=$(pwd)/tmp runsample.py $SAMPLEDIR {REFPATH} {SAMPLENAME} -od {SAMPLENAME}
'''
expand_path = compose(os.path.realpath, os.path.expanduser) 

''' Tab completion for directories ''' 
def glob_complete(text, state):
    expanded_text = expand_path(text)
    if os.path.isdir(expanded_text):
        expanded_text += '/'
    return (glob(expanded_text+'*')+[None])[state]

readline.set_completer_delims(' \t\n;')
readline.parse_and_bind("tab: complete")
readline.set_completer(glob_complete)

prompt = compose(raw_input, "{0}>".format)
def getvar(varname):
    return os.environ.get(varname, None) or prompt(varname)
コード例 #22
0
ファイル: testleaves.py プロジェクト: averagehat/biolearn
    other_rows = range(0, j) + range(j+1, D.shape[0])
    iks = get_products(other_rows)
    j_parent = partial(parent_len, D, j)
    return min(starmap(j_parent, iks))


from numpy import nan
def test_add_phylo_2D():
      _in = np.array([ [0, 3, 5  ], [3, 0, nan], [5, nan, 0] ])
      expected='''0->1:3
      0->2:5
      '''
      actual = additive_phylo(_in, 2)
      assert expected == actual

filterfst = compose(next, ifilter)
def str_row(D, j):
    row = D[j]
    p = (str(j)+"->{0}:{1}").format
    return '\n'.join(starmap(p, enumerate(row)))

def str_matrix(D):
    d_str = partial(str_row, D)
    return '\n'.join(map(d_str, xrange(D.shape[0])))

# method that gets node with matching distance
def get_match_dst(D, j, dist):
    assert dist != 0
    return (D[j] == dist).argmax()
    #return D[i, (D[i] == dist)]
コード例 #23
0
ファイル: numtry.py プロジェクト: averagehat/biolearn
from functools import partial
import numpy as np
from func import compose

#next argument is the size
simulate_prizedoor = partial(np.random.randint, 0, 3)
random_col_vals = partial(np.apply_along_axis, np.random.choice, 1)
simulate_guess = np.ones
rowchoice = compose(np.random.choice, np.ma.compressed)
RUNS = 1000


def goat_doors(pzs, gss):
    grid = np.repeat(np.ma.arange(3), RUNS).reshape(3, RUNS)
    unpicked_matrix = (grid == pzs) | (grid == gss)
    grid.mask = unpicked_matrix
    return np.array(map(rowchoice, grid.T))

switch_guess = goat_doors

def win_percentage(pzs, gss):
    #return (pzs == gss).sum()/float(len(gss))
    return 100*(pzs == gss).mean()

def sim_game(switch=False):
    pzs, gss = simulate_prizedoor(RUNS), simulate_guess(RUNS)
    goats = goat_doors(pzs, gss)
    picks = switch_guess(gss, goats) if switch else gss
    return win_percentage(picks, pzs)

print sim_game(True)
コード例 #24
0
ファイル: sopt.py プロジェクト: alexei-matveev/pts
def test(A, B, trafo=None):

    print "A=", A
    print "B=", B

    from pts.pes.mueller_brown import MB
    from pts.pes.mueller_brown import show_chain

    x = [A, B]

    # change coordinates:
    if trafo is not None:
        from func import compose
        MB = compose(MB, trafo)
        x = array(map(trafo.pinv, x))

    def show(x):
        if trafo is not None:
            show_chain(map(trafo, x))
        else:
            show_chain(x)

    from numpy import savetxt

    def callback(x, e, g, t):
        # savetxt("path.txt", x)
        # print "chain spacing=", spacing(x)
        pass

    from path import MetricPath # for respacing
    from rc import Linear # as reaction coordinate
    rcoord = Linear([1., -1.])

    from metric import Metric
    mt = Metric(rcoord)

    n = 3
    n_max = 30
    while True:
        #
        # Respace vertices based on custom metric built from the
        # definition of reaction coordinate:
        #
        p = MetricPath(x, mt.norm_up)
        x = array(map(p, linspace(0., 1., n)))

        print "BEFORE, rc(x)=", map(rcoord, x)
        show(x)

        # x = respace(x, tangent4, spacing)

        # print "RESPACE, x=", x
        # print "spacing(x)=", spacing(x)
        # show(x)

#       x, stats = soptimize(MB, x, tangent1, spacing, maxit=20, maxstep=0.1, callback=callback)
#       x, stats = soptimize(MB, x, tangent4, maxit=20, maxstep=0.1, callback=callback)
        x, stats = soptimize(MB, x, tangent4, rc=rcoord, maxit=20, maxstep=0.1, callback=callback)
        savetxt("mb-path.txt-" + str(len(x)), x)

        print "AFTER, rc(x)=", map(rcoord, x)
        show(x)

        if n < n_max:
            # double the number of beads:
            n = 2 * n + 1
        else:
            print "========================================================="
            print "Conveged for the maximal tested number of beads: ", n
            print "========================================================="
            break
コード例 #25
0
ファイル: scrach.py プロジェクト: averagehat/biolearn
from fn import _, F
from fn.iters import take, accumulate
from utils import slider
from assembly import drawgraph
from numpy import nan

def to_adj_list(G, edgekey='weight', bothways=True, as_float=False):
    edges = sorted(ifilterfalse(lambda x: x[0] ==x[1], G.edges(data=True)))
    if as_float:
        res = map(lambda x: (x[0], x[1], float(x[-1]['weight'])), edges)
        form = "{0}->{1}:{2:.3f}\n{1}->{0}:{2:.3f}".format
    else:
        res = map(lambda x: (x[0], x[1], int(x[-1]['weight'])), edges)
        form="{0}->{1}:{2}".format if not bothways else "{0}->{1}:{2}\n{1}->{0}:{2}".format
    return starmap(form, res)
adj_str = compose('\n'.join, to_adj_list)

def fst_or_none(func, seq):
    res = filter(func, seq)
    return None if not res else res[0]
filterfst = compose(next, ifilter)

def nondiag(D, i):return list(set(range(len(D))) - set([i]) )
ndiag_perms = compose(get_products, nondiag)

def limbmatch(D, n):
    ''' find nodes i and k such that they satisfie the linear equation:
        D_ik = D_in + D_nk'''
    #print np.isnan(D[n]).all()
    def match(tup):
        i, k = tup
コード例 #26
0
ファイル: sam.py プロジェクト: demis001/biopandas
{
    'A' : chr,
    'i' : int,
    'f' : float,
    'Z' : str,
    'H' : int, # hex
    'B' : parse_array
}

#parse cigar string
cigar_regex = r'(?:([0-9]+)([MIDNSHPX=]))+?'
reg = re.compile(cigar_regex)
tups = reg.findall('15S213M23S')
key,value = itemgetter(1), itemgetter(0)
groups = groupby(sorted(tups, key=key), key)
get_counts = pmap(compose(int, itemgetter(0)))
sum_counts = compose(sum, get_counts)
cigar_dict = dict( (name, sum_counts(nums)) for name, nums in groups)
mismatches = sum(num for key, num in cigar_dict.items() if key not in 'M=')

#dictmap(compose(sum, get_counts), dict(groups))
#sum(starmap(to_cigar, tups))

#dict(map(reverse, tups))
''' assert sum(itemgetter('M', 'I', 'S', '=', 'X')) == len(seq) == len(quality), \
    "cigar string M/I/S/=/X should sum to the length of the query sequence." '''

#TODO: parse flag
#TODO: handle empty cases (unmapped reads, *)

index = ['QNAME', 'POS', 'REF']