예제 #1
0
파일: sopt.py 프로젝트: alexei-matveev/pts
def test1(n):
    from numpy import array

    # One   equilibrium  of   Ar4  LJ   cluster  (in   coordinates  of
    # c2v_tetrahedron1 Func):
    w = 0.39685026
    A = array([w, w, +w])

    # Another equilibrium:
    B = array([w, w, -w])

    # Halfway between A and B:
    C = (A + B) / 2.0
    C = array([w + 0.01, w - 0.01, 0.0])

    xs = array([A, C, B])

    from test.testfuns import c2v_tetrahedron1, diagsandhight
    from path import MetricPath
    from metric import Metric
    from numpy import linspace

    z = c2v_tetrahedron1()

    # z = diagsandhight()
    # r = 1.12246195815
    # A = array([r, r,  r / sqrt(2.)])
    # B = array([r, r, -r / sqrt(2.)])
    # C = array([r, r * sqrt(2.), 0.])
    # xs = array([A, C, B])

    p = MetricPath(xs, Metric(z).norm_up)

    x0 = map(p, linspace(0., 1., n))

    from ase import Atoms
    from qfunc import QFunc
    from func import compose

    pes = compose(QFunc(Atoms("Ar4")), z)

    from rc import Volume

    vol = compose(Volume(), z)

    def callback(x, e, g, t):
        # from pts.tools.jmol import jmol_view_path
        print "energies=", e # map(pes, x)
        print "volume=", map(vol, x)
        # jmol_view_path(map(z, x), syms=["Ar"]*4, refine=1)
        pass

    print "BEFORE:"
    callback(x0, map(pes, x0), map(pes.fprime, x0), None)

    x1, info = soptimize(pes, x0, tangent1, rc=vol, callback=callback)
    # print "info=", info

    print "AFTER:"
    callback(x1, map(pes, x1), map(pes.fprime, x1), None)
예제 #2
0
파일: sopt.py 프로젝트: chrinide/pts
def test1(n):
    from numpy import array

    # One   equilibrium  of   Ar4  LJ   cluster  (in   coordinates  of
    # c2v_tetrahedron1 Func):
    w = 0.39685026
    A = array([w, w, +w])

    # Another equilibrium:
    B = array([w, w, -w])

    # Halfway between A and B:
    C = (A + B) / 2.0
    C = array([w + 0.01, w - 0.01, 0.0])

    xs = array([A, C, B])

    from test.testfuns import c2v_tetrahedron1, diagsandhight
    from path import MetricPath
    from metric import Metric
    from numpy import linspace

    z = c2v_tetrahedron1()

    # z = diagsandhight()
    # r = 1.12246195815
    # A = array([r, r,  r / sqrt(2.)])
    # B = array([r, r, -r / sqrt(2.)])
    # C = array([r, r * sqrt(2.), 0.])
    # xs = array([A, C, B])

    p = MetricPath(xs, Metric(z).norm_up)

    x0 = map(p, linspace(0., 1., n))

    from ase import Atoms
    from qfunc import QFunc
    from func import compose

    pes = compose(QFunc(Atoms("Ar4")), z)

    from rc import Volume

    vol = compose(Volume(), z)

    def callback(x, e, g, t):
        # from pts.tools.jmol import jmol_view_path
        print "energies=", e  # map(pes, x)
        print "volume=", map(vol, x)
        # jmol_view_path(map(z, x), syms=["Ar"]*4, refine=1)
        pass

    print "BEFORE:"
    callback(x0, map(pes, x0), map(pes.fprime, x0), None)

    x1, info = soptimize(pes, x0, tangent1, rc=vol, callback=callback)
    # print "info=", info

    print "AFTER:"
    callback(x1, map(pes, x1), map(pes.fprime, x1), None)
예제 #3
0
def fqframe(fileh):
    final_schema = Schema({
        'id': str,
        'seq': str,
        'quality': str,
        'qual_ints': check_np_type('int64'),
        'error': check_np_type('float64'),
        'description': str
    })

    #get_object = _id
    index = ['id']
    columns = ('id', 'seq', 'quality', 'description', 'qual_ints', 'error')
    SANGER = True
    get_id = attr('id')
    get_seq = compose(str, attr('seq'))
    get_qual_ints = compose_all(np.array, itemgetter('phred_quality'),
                                attr('_per_letter_annotations'))
    get_description = attr('description')
    get_quality = SeqIO.QualityIO._get_sanger_quality_str
    get_error = compose(error, get_qual_ints)
    #get_error = error_from_ints(get_qual_ints)
    getters = [
        get_id, get_seq, get_quality, get_description, get_qual_ints, get_error
    ]
    assert len(getters) == len(columns)
    metadata = {'filename': fileh.name}
    iterator = get_fastq(fileh)
    get_raw_record = partial(next, iterator)

    #    def get_row(record):
    #        #record = next(fileh)
    ##        import sys
    ##        __module__ = sys.modules[__name__]
    ##        get_getter = compose(attr, "get_{0}".format)
    ##        _getters = map(get_getter, columns)
    ##        self_getters = apply_each(_getters, __module__) #fzip(_getters, repeat(__module__, clen))
    #        results = apply_each(self_getters, record)
    #        final_dict = dict(zip(columns, results))
    #        final_schema.validate(final_dict)
    #        return final_dict

    #    def load_fastq():
    #        fq = get_fastq(fileh)
    #        dicts = map(get_row, fq)
    #        return pd.DataFrame(dicts).set_index(index) #, index=index, columns=columns)

    #jreturn nameddict(
    return {
        'obj_func': get_raw_record,
        'columns': columns,
        'getters': getters,
        'validator': final_schema,
        'dictgetters': None
    }
예제 #4
0
def fqframe(fileh):
    final_schema =  Schema({
        'id' : str,
        'seq' : str,
        'quality' : str,
        'qual_ints' : check_np_type('int64'),
        'error' : check_np_type('float64'),
        'description' : str
    })

    #get_object = _id
    index = ['id']
    columns = ('id', 'seq', 'quality', 'description', 'qual_ints', 'error')
    SANGER = True
    get_id = attr('id')
    get_seq= compose(str, attr('seq'))
    get_qual_ints = compose_all(np.array, itemgetter('phred_quality'), attr('_per_letter_annotations'))
    get_description = attr('description')
    get_quality = SeqIO.QualityIO._get_sanger_quality_str
    get_error = compose(error, get_qual_ints)
    #get_error = error_from_ints(get_qual_ints)
    getters = [get_id, get_seq, get_quality, get_description, get_qual_ints, get_error]
    assert len(getters) == len(columns)
    metadata = {'filename' : fileh.name}
    iterator = get_fastq(fileh)
    get_raw_record = partial(next, iterator)

#    def get_row(record):
#        #record = next(fileh)
##        import sys
##        __module__ = sys.modules[__name__]
##        get_getter = compose(attr, "get_{0}".format)
##        _getters = map(get_getter, columns)
##        self_getters = apply_each(_getters, __module__) #fzip(_getters, repeat(__module__, clen))
#        results = apply_each(self_getters, record)
#        final_dict = dict(zip(columns, results))
#        final_schema.validate(final_dict)
#        return final_dict

#    def load_fastq():
#        fq = get_fastq(fileh)
#        dicts = map(get_row, fq)
#        return pd.DataFrame(dicts).set_index(index) #, index=index, columns=columns)

    #jreturn nameddict(
    return { 'obj_func' : get_raw_record,
        'columns' : columns,
        'getters' : getters,
        'validator' : final_schema,
        'dictgetters' : None
    }
예제 #5
0
def parse_cigar(cigar_str):
    #? makes the regex not be too greedy
    cigar_regex = r'(?:([0-9]+)([MIDNSHPX=]))+?'
    reg = re.compile(cigar_regex)
    tups = reg.findall(cigar_str)
    key, value = itemgetter(1), itemgetter(0)
    groups = groupby(sorted(tups, key=key), key)
    get_counts = pmap(compose(int, itemgetter(0)))
    sum_counts = compose(sum, get_counts)
    s = "cigar_{0}".format
    cigar_dict = dict( (s(name), sum_counts(nums)) for name, nums in groups)
    #print cigar_dict
    mismatches = sum(num for k, num in cigar_dict.items() if k not in ['cigar_M', 'cigar_='])
    return merge_dicts(cigar_dict, {'cigar_score': mismatches})
예제 #6
0
def fqframe(fileh):
    final_schema = Schema({
        'id': str,
        'seq': str,
        'quality': str,
        'qual_ints': check_np_type('int64'),
        'error': check_np_type('float64'),
        'description': str
    })

    #get_object = _id
    index = ['id']
    columns = ('id', 'seq', 'quality', 'description', 'qual_ints', 'error')
    SANGER = True
    get_id = attr('id')
    get_seq = compose(str, attr('seq'))
    get_qual_ints = compose_all(np.array, itemgetter('phred_quality'),
                                attr('_per_letter_annotations'))
    get_description = attr('description')
    get_quality = SeqIO.QualityIO._get_sanger_quality_str
    get_error = compose(error, get_qual_ints)

    #get_error = error_from_ints(get_qual_ints)

    def get_row(record):
        #record = next(fileh)
        print(get_funcs())
        import sys
        __module__ = sys.modules[__name__]
        get_getter = compose(attr, "get_{0}".format)
        _getters = map(get_getter, columns)
        self_getters = apply_each(
            _getters, __module__)  #fzip(_getters, repeat(__module__, clen))
        results = apply_each(self_getters, record)
        final_dict = dict(zip(columns, results))
        final_schema.validate(final_dict)
        return final_dict

    def load_fastq():
        fq = get_fastq(fileh)
        dicts = map(get_row, fq)
        return pd.DataFrame(dicts).set_index(
            index)  #, index=index, columns=columns)

    return namedtuple('FastqFrame', ['get_row', 'load_fastq'])(
        get_row, load_fastq)  #{'get_row' : get_row, 'load_fastq' : load_fastq}
예제 #7
0
def fqframe(fileh):
    final_schema =  Schema({
        'id' : str,
        'seq' : str,
        'quality' : str,
        'qual_ints' : check_np_type('int64'),
        'error' : check_np_type('float64'),
        'description' : str
    })

    #get_object = _id
    index = ['id']
    columns = ('id', 'seq', 'quality', 'description', 'qual_ints', 'error')
    SANGER = True
    get_id = attr('id')
    get_seq= compose(str, attr('seq'))
    get_qual_ints = compose_all(np.array, itemgetter('phred_quality'), attr('_per_letter_annotations'))
    get_description = attr('description')
    get_quality = SeqIO.QualityIO._get_sanger_quality_str
    get_error = compose(error, get_qual_ints)
    #get_error = error_from_ints(get_qual_ints)

    def get_row(record):
        #record = next(fileh)
        print(get_funcs())
        import sys
        __module__ = sys.modules[__name__]
        get_getter = compose(attr, "get_{0}".format)
        _getters = map(get_getter, columns)
        self_getters = apply_each(_getters, __module__) #fzip(_getters, repeat(__module__, clen))
        results = apply_each(self_getters, record)
        final_dict = dict(zip(columns, results))
        final_schema.validate(final_dict)
        return final_dict

    def load_fastq():
        fq = get_fastq(fileh)
        dicts = map(get_row, fq)
        return pd.DataFrame(dicts).set_index(index) #, index=index, columns=columns)

    return namedtuple('FastqFrame', ['get_row', 'load_fastq'])(get_row, load_fastq)#{'get_row' : get_row, 'load_fastq' : load_fastq}
예제 #8
0
 def get_row(record):
     #record = next(fileh)
     print(get_funcs())
     import sys
     __module__ = sys.modules[__name__]
     get_getter = compose(attr, "get_{0}".format)
     _getters = map(get_getter, columns)
     self_getters = apply_each(_getters, __module__) #fzip(_getters, repeat(__module__, clen))
     results = apply_each(self_getters, record)
     final_dict = dict(zip(columns, results))
     final_schema.validate(final_dict)
     return final_dict
예제 #9
0
 def get_row(record):
     #record = next(fileh)
     print(get_funcs())
     import sys
     __module__ = sys.modules[__name__]
     get_getter = compose(attr, "get_{0}".format)
     _getters = map(get_getter, columns)
     self_getters = apply_each(
         _getters, __module__)  #fzip(_getters, repeat(__module__, clen))
     results = apply_each(self_getters, record)
     final_dict = dict(zip(columns, results))
     final_schema.validate(final_dict)
     return final_dict
예제 #10
0
from operator import itemgetter
from functools import partial
import operator as op
from operator import add, div
from schema import Schema, Use
from itertools import ifilter
# Parse options
#from pyparsing import Regex

to_np_int = partial(np.array, dtype=int)
parse_array = compose_all(to_np_int, psplit(','), pstrip('[]'))
tabsplit = psplit('\t')


minus33 = partial(add, -33)
qual_int_sanger = compose(minus33, ord)
qual_to_phreds = compose(to_np_int, pmap(qual_int_sanger))
error = compose(partial(pow, 10), partial2(div, -10.0))
#don't need to map because numpy vectorizes it automatically
#TODO: handle non-sanger version
sanger_qual_str_to_error = compose(error, qual_to_phreds)

basic_scheme={
    'QNAME' : str,
    'FLAG' : int,
    'RNAME' : str,
    'POS' : int,
    'MAPQ' : int,
    'CIGAR' : str,
    'RNEXT' : str,
    'PNEXT' : int,
예제 #11
0
from operator import methodcaller as mc, ne
import numpy as np
import networkx as nx
from matplotlib import pyplot as plt


def to_adj_list(G, edgekey='weight', bothways=True, as_float=False):
    edges = sorted(ifilterfalse(lambda x: x[0] ==x[1], G.edges(data=True)))
    if as_float:
        res = map(lambda x: (x[0], x[1], float(x[-1]['weight'])), edges)
        form = "{0}->{1}:{2:.3f}\n{1}->{0}:{2:.3f}".format
    else:
        res = map(lambda x: (x[0], x[1], int(x[-1]['weight'])), edges)
        form="{0}->{1}:{2}".format if not bothways else "{0}->{1}:{2}\n{1}->{0}:{2}".format
    return starmap(form, res)
adj_str = compose('\n'.join, to_adj_list)

def parseM(raw):
    '''parse & return a space-seperated matrix.'''
    _in = filter(bool, raw.split('\n'))
    return  np.matrix(map(pmap(float), map(str.split, _in)))

def quantify(iterable, pred=bool):
    '''https://docs.python.org/2/library/itertools.html#recipes
    "Count how many times the predicate is true"'''
    return sum(imap(pred, iterable))

def drawgraph(G, edgekey='weight', big=False, **kwargs):
    if big: fig = plt.figure(figsize = (15, 10))
    pos=nx.spring_layout(G)
    nx.draw_networkx(G, pos=pos, **kwargs)
예제 #12
0
파일: dist.py 프로젝트: averagehat/biolearn
from func import compose, partial2
import numpy as np
from functools import partial
import operator as op
from itertools import islice, ifilter
from fn.iters import splitby
from operator import methodcaller
from func import compose_all, pmap, psplit, _not


norm_matrix = partial(np.linalg.norm, axis=1)
dist_matrix = compose(norm_matrix, op.sub)
min_dist_position = compose(np.argmin, dist_matrix)

def gravity(M):
    return M.sum(axis=0)/float(len(M))

def k_means_cluster(data, centers=None, k=None):
    assert (centers is not None or k is not None)
    if centers is None:
        centers = np.empty((k, data.shape[1]))
        centers[:] = data[:k]
    old_centers = centers.copy()

    mincenters = partial(min_dist_position, centers)
    data_by_cluster = np.apply_along_axis(mincenters, 1, data)
    for i, _ in enumerate(centers):
        cluster = data[data_by_cluster == i]
        centers[i] = gravity(cluster)
    if np.allclose(old_centers, centers):
        return centers, data_by_cluster
예제 #13
0
def col_compare(df, col, value, comp):
    half = partial(comp, value)
    boolean = compose(half, df.__getitem__)
    return boolean(col)
예제 #14
0
Join fastq and SAM (merge on QNAME [and SEQ])
Join VCF and SAM (merge on POS)
Pileup
Join VCF and Pileup
'''

'''
pcompose = partial(partial, compose)
error_from_ints = pcompose(error)
#sanger_qual_str_to_error = cmperror(qual_to_phreds)

'''
get_fastq = partial(SeqIO.parse, format='fastq')
get_fasta = partial(SeqIO.parse, format='fasta')
to_np_int = partial(np.array, dtype=int)
gccontent = compose(ilen, pifilter('GC'.__contains__))

minus33 = partial(add, -33)
qual_int_sanger = compose(minus33, ord)

''' Error = 10^-(Phred/10) '''
qual_to_phreds = compose(to_np_int, pmap(qual_int_sanger))
error = compose(partial(pow, 10), partial2(div, -10.0))
#don't need to map because numpy vectorizes it automatically
#TODO: handle non-sanger version
sanger_qual_str_to_error = compose(error, qual_to_phreds)




#SANGER_OFFSET = 33
예제 #15
0
'''
Join fastq and SAM (merge on QNAME [and SEQ])
Join VCF and SAM (merge on POS)
Pileup
Join VCF and Pileup
'''
'''
pcompose = partial(partial, compose)
error_from_ints = pcompose(error)
#sanger_qual_str_to_error = cmperror(qual_to_phreds)

'''
get_fastq = partial(SeqIO.parse, format='fastq')
get_fasta = partial(SeqIO.parse, format='fasta')
to_np_int = partial(np.array, dtype=int)
gccontent = compose(ilen, pifilter('GC'.__contains__))

minus33 = partial(add, -33)
qual_int_sanger = compose(minus33, ord)
''' Error = 10^-(Phred/10) '''
qual_to_phreds = compose(to_np_int, pmap(qual_int_sanger))
error = compose(partial(pow, 10), partial2(div, -10.0))
#don't need to map because numpy vectorizes it automatically
#TODO: handle non-sanger version
sanger_qual_str_to_error = compose(error, qual_to_phreds)

#SANGER_OFFSET = 33
'''
assert len(quality) == len(error) == len(phred_scores)
'''
예제 #16
0
0x1  : "template having multiple segments in sequencing",
0x2  : "each segment properly aligned according to the aligner",
0x4  : "segment unmapped",
0x8  : "next segment in the template unmapped",
0x10 : "SEQ being reverse complemented",
0x20 : "SEQ of the next segment in the template being reversed",
0x40 : "the rst segment in the template",
0x80 : "the last segment in the template",
0x100: "secondary alignment",
0x200: "not passing quality controls",
0x400: "PCR or optical duplicate",
0x800: "supplementary alignment"
}


eval_flag = compose(bool, op.and_)

def flag_dict(flag):
    return dict((meaning, eval_flag(bit, flag)) for bit, meaning in flag_meanings.items())
def split_list(A, idx):
    return A[:idx], A[idx:]

sam_columns = ("QNAME", "FLAG", "RNAME", "POS", "MAPQ", "CIGAR", "RNEXT", "PNEXT", "TLEN", "SEQ", "QUAL") #optiosn


#TODO: get_record function takes a filehandle and returns a single record via SeqIO, etc.
#So functions expect a dictionary I guess
#pass
parse_options = compose(dict, pmap(parse_option)) #, tabsplit)
#readfields = compose(tabsplit, next)
line_to_dict = compose_all(dict, partial(zip, sam_columns)) #, tabsplit)
예제 #17
0
    nx.draw_networkx(G, pos=pos, **kwargs)
    if edgekey:
        edge_labels=dict([((u,v,),d.get(edgekey, ''))
                         for u,v,d in G.edges(data=True)])
        nx.draw_networkx_edge_labels(G,pos,edge_labels=edge_labels)#, **kwargs)
    plt.show()
#NOTE: requires graphviz
    #nx.write_dot(G,'graph.dot')
#dot -Tpng graph.dot > graph.png

def info_fromkmer(kmer):
        node1, node2 = kmer[:-1], kmer[1:]
        return node1, node2, {'kmer' : kmer}


yield_pathgraph = compose(F(imap, info_fromkmer), slider)
#pathlist = compose(list, yield_pathgraph)
#use reduce
def make_debruijn(s, k=None):
    G = nx.MultiDiGraph()
    if not k:
       G.add_edges_from(imap(info_fromkmer, s))
    else:
        #build straight from string
       G.add_edges_from(yield_pathgraph(s, k))
    return G



'''
set v to some random node.
예제 #18
0
def col_compare(df, col, value, comp):
    half = partial(comp, value)
    boolean = compose(half, df.__getitem__)
    return boolean(col)
예제 #19
0
파일: sopt.py 프로젝트: chrinide/pts
def test(A, B, trafo=None):

    print "A=", A
    print "B=", B

    from pts.pes.mueller_brown import MB
    from pts.pes.mueller_brown import show_chain

    x = [A, B]

    # change coordinates:
    if trafo is not None:
        from func import compose
        MB = compose(MB, trafo)
        x = array(map(trafo.pinv, x))

    def show(x):
        if trafo is not None:
            show_chain(map(trafo, x))
        else:
            show_chain(x)

    from numpy import savetxt

    def callback(x, e, g, t):
        # savetxt("path.txt", x)
        # print "chain spacing=", spacing(x)
        pass

    from path import MetricPath  # for respacing
    from rc import Linear  # as reaction coordinate
    rcoord = Linear([1., -1.])

    from metric import Metric
    mt = Metric(rcoord)

    n = 3
    n_max = 30
    while True:
        #
        # Respace vertices based on custom metric built from the
        # definition of reaction coordinate:
        #
        p = MetricPath(x, mt.norm_up)
        x = array(map(p, linspace(0., 1., n)))

        print "BEFORE, rc(x)=", map(rcoord, x)
        show(x)

        # x = respace(x, tangent4, spacing)

        # print "RESPACE, x=", x
        # print "spacing(x)=", spacing(x)
        # show(x)

        #       x, stats = soptimize(MB, x, tangent1, spacing, maxit=20, maxstep=0.1, callback=callback)
        #       x, stats = soptimize(MB, x, tangent4, maxit=20, maxstep=0.1, callback=callback)
        x, stats = soptimize(MB,
                             x,
                             tangent4,
                             rc=rcoord,
                             maxit=20,
                             maxstep=0.1,
                             callback=callback)
        savetxt("mb-path.txt-" + str(len(x)), x)

        print "AFTER, rc(x)=", map(rcoord, x)
        show(x)

        if n < n_max:
            # double the number of beads:
            n = 2 * n + 1
        else:
            print "========================================================="
            print "Conveged for the maximal tested number of beads: ", n
            print "========================================================="
            break
예제 #20
0
    'bats', 'beagle-lib', 'beast/beast', 'beast/BEASTv1.8.0', 'bio_pieces',
    'blast/blast-2.2.30+', 'bowtie/bowtie-2.2.5', 'bwa/bwa-0.7.12-r1044',
    'cuda/cuda', 'cuda/cuda_6.5.14', 'igv/igv-2.3.37', 'igv/igv-2.3.52',
    'mrsnbactpipeline', 'ngs_mapper/ngs_mapper-1.1',
    'ngs_mapper/ngs_mapper-1.2', 'pathdiscov/pathdiscov-4.2', 'pypbs',
    'usamriidPathDiscov', 'vdbstatus', 'ray/ray-2.3.1', 'roche/analysis',
    'roche/analysis-v2.9', 'roche/gsprocessor-v2.9', 'samtools/samtools-1.1'
]

ngs_mapper_cmd = '''
cd $PBS_O_WORKDIR
mkdir -p $(pwd)/tmp
SAMPLEDIR=/media/VD_Research/NGSData/ReadsBySample/${SAMPLENAME}
TMPDIR=$(pwd)/tmp runsample.py $SAMPLEDIR {REFPATH} {SAMPLENAME} -od {SAMPLENAME}
'''
expand_path = compose(os.path.realpath, os.path.expanduser)
''' Tab completion for directories '''


def glob_complete(text, state):
    expanded_text = expand_path(text)
    if os.path.isdir(expanded_text):
        expanded_text += '/'
    return (glob(expanded_text + '*') + [None])[state]


readline.set_completer_delims(' \t\n;')
readline.parse_and_bind("tab: complete")
readline.set_completer(glob_complete)

prompt = compose(raw_input, "{0}>".format)
예제 #21
0
파일: custom.py 프로젝트: VDBWRAIR/pbs
'pypbs',
'usamriidPathDiscov',
'vdbstatus',
'ray/ray-2.3.1',
'roche/analysis',
'roche/analysis-v2.9',
'roche/gsprocessor-v2.9',
'samtools/samtools-1.1']

ngs_mapper_cmd = '''
cd $PBS_O_WORKDIR
mkdir -p $(pwd)/tmp
SAMPLEDIR=/media/VD_Research/NGSData/ReadsBySample/${SAMPLENAME}
TMPDIR=$(pwd)/tmp runsample.py $SAMPLEDIR {REFPATH} {SAMPLENAME} -od {SAMPLENAME}
'''
expand_path = compose(os.path.realpath, os.path.expanduser) 

''' Tab completion for directories ''' 
def glob_complete(text, state):
    expanded_text = expand_path(text)
    if os.path.isdir(expanded_text):
        expanded_text += '/'
    return (glob(expanded_text+'*')+[None])[state]

readline.set_completer_delims(' \t\n;')
readline.parse_and_bind("tab: complete")
readline.set_completer(glob_complete)

prompt = compose(raw_input, "{0}>".format)
def getvar(varname):
    return os.environ.get(varname, None) or prompt(varname)
예제 #22
0
    other_rows = range(0, j) + range(j+1, D.shape[0])
    iks = get_products(other_rows)
    j_parent = partial(parent_len, D, j)
    return min(starmap(j_parent, iks))


from numpy import nan
def test_add_phylo_2D():
      _in = np.array([ [0, 3, 5  ], [3, 0, nan], [5, nan, 0] ])
      expected='''0->1:3
      0->2:5
      '''
      actual = additive_phylo(_in, 2)
      assert expected == actual

filterfst = compose(next, ifilter)
def str_row(D, j):
    row = D[j]
    p = (str(j)+"->{0}:{1}").format
    return '\n'.join(starmap(p, enumerate(row)))

def str_matrix(D):
    d_str = partial(str_row, D)
    return '\n'.join(map(d_str, xrange(D.shape[0])))

# method that gets node with matching distance
def get_match_dst(D, j, dist):
    assert dist != 0
    return (D[j] == dist).argmax()
    #return D[i, (D[i] == dist)]
예제 #23
0
from functools import partial
import numpy as np
from func import compose

#next argument is the size
simulate_prizedoor = partial(np.random.randint, 0, 3)
random_col_vals = partial(np.apply_along_axis, np.random.choice, 1)
simulate_guess = np.ones
rowchoice = compose(np.random.choice, np.ma.compressed)
RUNS = 1000


def goat_doors(pzs, gss):
    grid = np.repeat(np.ma.arange(3), RUNS).reshape(3, RUNS)
    unpicked_matrix = (grid == pzs) | (grid == gss)
    grid.mask = unpicked_matrix
    return np.array(map(rowchoice, grid.T))

switch_guess = goat_doors

def win_percentage(pzs, gss):
    #return (pzs == gss).sum()/float(len(gss))
    return 100*(pzs == gss).mean()

def sim_game(switch=False):
    pzs, gss = simulate_prizedoor(RUNS), simulate_guess(RUNS)
    goats = goat_doors(pzs, gss)
    picks = switch_guess(gss, goats) if switch else gss
    return win_percentage(picks, pzs)

print sim_game(True)
예제 #24
0
파일: sopt.py 프로젝트: alexei-matveev/pts
def test(A, B, trafo=None):

    print "A=", A
    print "B=", B

    from pts.pes.mueller_brown import MB
    from pts.pes.mueller_brown import show_chain

    x = [A, B]

    # change coordinates:
    if trafo is not None:
        from func import compose
        MB = compose(MB, trafo)
        x = array(map(trafo.pinv, x))

    def show(x):
        if trafo is not None:
            show_chain(map(trafo, x))
        else:
            show_chain(x)

    from numpy import savetxt

    def callback(x, e, g, t):
        # savetxt("path.txt", x)
        # print "chain spacing=", spacing(x)
        pass

    from path import MetricPath # for respacing
    from rc import Linear # as reaction coordinate
    rcoord = Linear([1., -1.])

    from metric import Metric
    mt = Metric(rcoord)

    n = 3
    n_max = 30
    while True:
        #
        # Respace vertices based on custom metric built from the
        # definition of reaction coordinate:
        #
        p = MetricPath(x, mt.norm_up)
        x = array(map(p, linspace(0., 1., n)))

        print "BEFORE, rc(x)=", map(rcoord, x)
        show(x)

        # x = respace(x, tangent4, spacing)

        # print "RESPACE, x=", x
        # print "spacing(x)=", spacing(x)
        # show(x)

#       x, stats = soptimize(MB, x, tangent1, spacing, maxit=20, maxstep=0.1, callback=callback)
#       x, stats = soptimize(MB, x, tangent4, maxit=20, maxstep=0.1, callback=callback)
        x, stats = soptimize(MB, x, tangent4, rc=rcoord, maxit=20, maxstep=0.1, callback=callback)
        savetxt("mb-path.txt-" + str(len(x)), x)

        print "AFTER, rc(x)=", map(rcoord, x)
        show(x)

        if n < n_max:
            # double the number of beads:
            n = 2 * n + 1
        else:
            print "========================================================="
            print "Conveged for the maximal tested number of beads: ", n
            print "========================================================="
            break
예제 #25
0
from fn import _, F
from fn.iters import take, accumulate
from utils import slider
from assembly import drawgraph
from numpy import nan

def to_adj_list(G, edgekey='weight', bothways=True, as_float=False):
    edges = sorted(ifilterfalse(lambda x: x[0] ==x[1], G.edges(data=True)))
    if as_float:
        res = map(lambda x: (x[0], x[1], float(x[-1]['weight'])), edges)
        form = "{0}->{1}:{2:.3f}\n{1}->{0}:{2:.3f}".format
    else:
        res = map(lambda x: (x[0], x[1], int(x[-1]['weight'])), edges)
        form="{0}->{1}:{2}".format if not bothways else "{0}->{1}:{2}\n{1}->{0}:{2}".format
    return starmap(form, res)
adj_str = compose('\n'.join, to_adj_list)

def fst_or_none(func, seq):
    res = filter(func, seq)
    return None if not res else res[0]
filterfst = compose(next, ifilter)

def nondiag(D, i):return list(set(range(len(D))) - set([i]) )
ndiag_perms = compose(get_products, nondiag)

def limbmatch(D, n):
    ''' find nodes i and k such that they satisfie the linear equation:
        D_ik = D_in + D_nk'''
    #print np.isnan(D[n]).all()
    def match(tup):
        i, k = tup
예제 #26
0
파일: sam.py 프로젝트: demis001/biopandas
{
    'A' : chr,
    'i' : int,
    'f' : float,
    'Z' : str,
    'H' : int, # hex
    'B' : parse_array
}

#parse cigar string
cigar_regex = r'(?:([0-9]+)([MIDNSHPX=]))+?'
reg = re.compile(cigar_regex)
tups = reg.findall('15S213M23S')
key,value = itemgetter(1), itemgetter(0)
groups = groupby(sorted(tups, key=key), key)
get_counts = pmap(compose(int, itemgetter(0)))
sum_counts = compose(sum, get_counts)
cigar_dict = dict( (name, sum_counts(nums)) for name, nums in groups)
mismatches = sum(num for key, num in cigar_dict.items() if key not in 'M=')

#dictmap(compose(sum, get_counts), dict(groups))
#sum(starmap(to_cigar, tups))

#dict(map(reverse, tups))
''' assert sum(itemgetter('M', 'I', 'S', '=', 'X')) == len(seq) == len(quality), \
    "cigar string M/I/S/=/X should sum to the length of the query sequence." '''

#TODO: parse flag
#TODO: handle empty cases (unmapped reads, *)

index = ['QNAME', 'POS', 'REF']