Python MultiProcessor Examples

Programming Language: Python

Namespace/Package Name: hpf.processing

Class/Type: MultiProcessor

Examples at hotexamples.com: 4

Python MultiProcessor - 4 examples found. These are the top rated real world Python examples of hpf.processing.MultiProcessor extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

run(4)

Example #1

Show file

File: function_prediction.py Project: bsmithers/hpf

def main():
    from hpf.processing import MultiProcessor
    global _bfp,_db
    
    if opts[SEQUENCES]:
        f = open(opts[SEQUENCES])
        opts[SEQUENCES] = [line.strip().split()[0] for line in f if line.strip() != "" and not line.startswith("#")]
    
    seqs = defaultdict(lambda: [])
    for parent_key,domain_key in sequences(sequence_keys=opts[SEQUENCES], experiment_keys=opts[EXPERIMENT]):
        seqs[parent_key].append(domain_key)
    # The tasks are grouped by parent_sequence_key
    tasks = [(pkey,seqs[pkey]) for pkey in seqs]
    _print(len(tasks)," proteins for processing")

    
    lr,mi,fterms = tuple(metrics())
    print len(fterms), " functions known"
    
    # Be careful to close and serialize the persistent dictionaries
    try:
        _bfp = BayesFunctionPredictionDB(lr,mi, fterms)
        # Open a multi-processor for performing predictions on multiple
        pool = MultiProcessor(processors=opts[PROCESSORS],raise_errors=True)
        # Consume the pool generator, ignoring the results
        for r in pool.run(_predict, tasks, result=_upload,batches=100000):
            pass
    finally:
        for metric in (lr,mi):
            if hasattr(metric, 'close'):
                metric.close()

Example #2

Show file

File: domain_sccs.py Project: bsmithers/hpf

def psiblast_fold():
    global db
    if db==None:
        db = _connection()
    cursor = db.cursor()
    try:
        print "PSI-Blast and Fold Recognition Query"
        query = """
            select distinct d.domain_sequence_key, d.parent_sequence_key,
                d.domain_type, i.pdbId, r.chain, n.parent_start, n.parent_stop,
                length(s.sequence)
            from hpf.experiment e
            join hpf.protein p
            on e.id=p.experiment_key
            join hpf.domain d
            on p.sequence_key=d.parent_sequence_key
            join hpf.sequence s
            on d.domain_sequence_key=s.id
            join hpf.pdbSeqRes r
            on substring(d.parent_id FROM 4)=r.sequence_key
            join hpf.domainRegion n
            on d.id=n.domain_key
            join hpf.pdbIndex i
            on r.pdb_key=i.id
            where d.domain_type in ('fold_recognition','psiblast') 
        """

        if opts[EXPERIMENT]:
            query = query+" and e.id in (%s)" % opts[EXPERIMENT]
        print query
        cursor.execute(query)
        tasks = [(domain_key, parent_key, domain_type, pdb_id, chain, int(p_start), int(p_stop), seq_len) for domain_key, parent_key, domain_type, pdb_id, chain, p_start, p_stop, seq_len in cursor.fetchall()]
        keys = {}
        # Cartesian product returns many chains/pdbs for the same sequence
        # Filter these to one pdb/chain per domain by hashing
        for task in tasks:
            domain_key, parent_key, domain_type, pdb_id, chain, p_start, p_stop, seq_len = task
            keys[(domain_key,parent_key)] = task    
        tasks = keys.values()
    finally:
        cursor.close(); db.close(); db=None; cursor=None
    
    global pdb, manager, chain_lengths
    #manager = multiprocessing.Manager()
    #pdb = manager.dict()
    #chain_len = manager.dict()
    pool = MultiProcessor(raise_errors=False, modulus=100,processors=6)
    print "PSI-Blast and Fold Recognition Process"
    results = []
    for result in pool.run(pdb_domain, tasks):
        if isinstance(result, Exception):
            print result
        else:
            results.append(result)
    return results

Example #3

Show file

File: ypdr_format.py Project: bsmithers/hpf

def main():
    func_db = _func()
    func_cursor = func_db.cursor(MySQLdb.cursors.DictCursor)
    query = """
        select b.parent_sequence_key,b.domain_sequence_key,
        b.mf_acc as acc, b.name, b.pls_llr,b.base_llr,b.type,b.timestamp 
        from %s b 
        where pls_llr > 0
        """ % BAYES_TABLE
    print query
    func_cursor.execute()
    tasks = func_cursor.fetchall()
    func_cursor.close()
    func_db.close()
    pool = MultiProcessor(processors=8, modulus=100, raise_errors=False)
    pool.run(format, tasks, upload)

Example #4

Show file

File: sf_corr.py Project: bsmithers/hpf

def main():
    # Get all superfamilies and molecular functions
    global _mf_acc
    with MySQLdb.connect(db="functionTables",passwd="patrick_nyu") as cursor:
        # Only use superfamilies with something in the probability table
        query = """
            select distinct acc 
            from functionTables.probability_goLite_062009 
            where acc like '%.%'
            """
#        query = """select distinct substring_index(sccs,'.',3) 
#            from pdb.astral95_1_75 a 
#            join functionTables.probability_golite_062009 p
#            on substring_index(a.sccs,'.',3)=p.acc"""
        print query
        cursor.execute(query)
        sccs = [t[0] for t in cursor.fetchall()]
        print len(sccs)," superfamilies"
        # Only use molecular functions
        query = """
            select distinct p.acc 
            from functionTables.probability_goLite_062009 p 
            join mygoLite_062009.term t 
            on p.acc=t.acc and t.term_type='molecular_function' and t.acc!='GO:0003674'
            where p.acc2 is NULL
            order by p.metric asc
            """
        print query
        cursor.execute(query)
        _mf_acc = [t[0] for t in cursor.fetchall()]
        print len(_mf_acc)," molecular functions"

    tasks=[]
    for i,sf1 in enumerate(sccs):
        for sf2 in sccs[i+1:]:
            tasks.append((sf1,sf2))
    print len(tasks)," pairwise superfamilies"
            
    global _prob
    print "Opening shelve"
    dict = shelve.open(opts[SHELVE])
    _prob = Metric(dict=dict,default=0)
    pool = MultiProcessor(8, modulus=100, raise_errors=True)
    for r in pool.run(__calc_corr, tasks, __upload):
        pass