Python GeneAssociations Examples

Programming Language: Python

Namespace/Package Name: dp.associations

Class/Type: GeneAssociations

Examples at hotexamples.com: 4

Python GeneAssociations - 4 examples found. These are the top rated real world Python examples of dp.associations.GeneAssociations extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

fromFile(2)

Example #1

Show file

File: rmfastadup.py Project: jachymb/DiplomovaPrace

#!/usr/bin/env python
"""Remove duplicates in a fasta file"""
import sys
from dp.associations import GeneAssociations
from dp.ontology import Ontology
from collections import Counter
from dp.utils import parseFasta
seqs = set()
names = set()
fastafile = open(sys.argv[1])

MIN_SEQ_LEN = 32
MAX_SEQ_UNK = 0.1

TAXONS_HOMMO_SAPIENS = {9606}
asoc = GeneAssociations.fromFile(sys.argv[2], taxons = TAXONS_HOMMO_SAPIENS)
ontology = Ontology(sys.argv[3])
ontology.setAssociations(asoc)
asoc.transitiveClosure()
associated = set()
for k,v in asoc.associations.items():
    associated.update({g.upper() for g in v})

ss = dict(parseFasta("data/ss.txt"))
#print(associated)

for l in fastafile:
    name, typ, *_ = l[1:].split(" ")
    name = name.upper()
    seq = next(fastafile)
    sskey = "%s:secstr" % name.replace("_",":")

Example #2

Show file

File: main.py Project: jachymb/DiplomovaPrace

    dataset = None
    if options.dataset:
        # FIXME: When dataset is changed, serialized associations need to be regenerated. This is serious bug if we don't seed random to a constant
        dataset = [l.strip() for l in open(options.dataset)]
        random.shuffle(dataset)
        #assert options.reserve > 0.0
        #if options.reserve < 1.0: # Use ratio
        #    splitIndex = int(options.reserve * len(dataset))
        #else:
        #    splitIndex = int(options.reserve)
        #reserved = set(dataset[:splitIndex])
        #dataset = set(dataset[splitIndex:])
        dataset = set(dataset)

    associations = GeneAssociations.fromFile(associationsFileName,
                                             taxons=TAXONS,
                                             dataset=dataset)
    #reservedAssociations = GeneAssociations.fromFile(associationsFileName+"_reserved", dataset = reserved)
    ontology.setAssociations(associations)
    #ontology.setAssociations(reservedAssociations, 'reserved')

    if options.associationsDump:
        associations.serialize(options.associationsDump)
        #reservedAssociations.serialize(options.associationsDump+"_reserved")
        sys.exit()

    ontology.deleteSmallTerms(options.lb)
    associations.shrink(options.max, options.lb)

    ontology.overView()
    ontology.dotExport()

Example #3

Show file

File: rmfastadup.py Project: jachymb/DiplomovaPrace

#!/usr/bin/env python
"""Remove duplicates in a fasta file"""
import sys
from dp.associations import GeneAssociations
from dp.ontology import Ontology
from collections import Counter
from dp.utils import parseFasta
seqs = set()
names = set()
fastafile = open(sys.argv[1])

MIN_SEQ_LEN = 32
MAX_SEQ_UNK = 0.1

TAXONS_HOMMO_SAPIENS = {9606}
asoc = GeneAssociations.fromFile(sys.argv[2], taxons=TAXONS_HOMMO_SAPIENS)
ontology = Ontology(sys.argv[3])
ontology.setAssociations(asoc)
asoc.transitiveClosure()
associated = set()
for k, v in asoc.associations.items():
    associated.update({g.upper() for g in v})

ss = dict(parseFasta("data/ss.txt"))
#print(associated)

for l in fastafile:
    name, typ, *_ = l[1:].split(" ")
    name = name.upper()
    seq = next(fastafile)
    sskey = "%s:secstr" % name.replace("_", ":")

Example #4

Show file

File: main.py Project: jachymb/DiplomovaPrace

    dataset = None
    if options.dataset:
        # FIXME: When dataset is changed, serialized associations need to be regenerated. This is serious bug if we don't seed random to a constant
        dataset = [l.strip() for l in open(options.dataset)]
        random.shuffle(dataset)
        #assert options.reserve > 0.0
        #if options.reserve < 1.0: # Use ratio
        #    splitIndex = int(options.reserve * len(dataset))
        #else:
        #    splitIndex = int(options.reserve)
        #reserved = set(dataset[:splitIndex])
        #dataset = set(dataset[splitIndex:])
        dataset = set(dataset)

    associations = GeneAssociations.fromFile(associationsFileName, taxons = TAXONS, dataset = dataset)
    #reservedAssociations = GeneAssociations.fromFile(associationsFileName+"_reserved", dataset = reserved)
    ontology.setAssociations(associations)
    #ontology.setAssociations(reservedAssociations, 'reserved')
 
    if options.associationsDump:
        associations.serialize(options.associationsDump)
        #reservedAssociations.serialize(options.associationsDump+"_reserved")
        sys.exit()

    ontology.deleteSmallTerms(options.lb)
    associations.shrink(options.max, options.lb)
    
    ontology.overView()
    ontology.dotExport()