Ejemplos de uniqify en Python, ejemplos de uniqify.uniqify en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: akshit_gene_positions.py Proyecto: eltanin4/black_queen_critique

def lastReactions(genotype):
    kos = [pos_to_gene_map[i] for i in np.where((genotype > 0.5) * 1)[0]]
    refDF = pd.read_csv('KOREF.txt',
                        delimiter='\t',
                        sep='delimiter',
                        header=None,
                        names=['id', 'rid'])

    def give_number(string):
        return int(string[-5:])

    refDF['id'] = refDF['id'].apply(give_number)
    refDF['rid'] = refDF['rid'].apply(give_number)
    kos_to_rxns = refDF.loc[refDF['id'].isin(kos), :]
    can = refDF.loc[refDF['id'].isin(kos), :]['rid'].values
    can = ['R' + (5 - len(str(int(e)))) * '0' + str(int(e)) for e in can]
    can = list((set(can)) & set(rxns))
    avrxns = np.array([rxn_kegg_to_id[e] for e in can])
    avrxns = np.array([rxn_kegg_to_id[e] for e in can])
    avScopeRxn = giveScope(rxnMat[avrxns], prodMat[avrxns], seedVec,
                           sumRxnVec[avrxns])[1]
    scopeRxns = np.nonzero(avrxns * avScopeRxn)[0]
    cores = [kegg_to_id[c] for c in Core if c not in Currency]
    core_sum = np.sum(prodMat[scopeRxns][:, cores], axis=1)
    lasts = [rxn_id_to_kegg[e] for e in avrxns[np.where(core_sum)[0]]]
    mlasts = [int(l[1:]) for l in lasts]
    return uniqify(
        kos_to_rxns.loc[kos_to_rxns['rid'].isin(mlasts)]['id'].tolist())

Ejemplo n.º 2

0

Mostrar archivo

Archivo: akshit_gene_positions.py Proyecto: eltanin4/black_queen_critique

def intReactions(genotype, firsts, lasts):
    kos = [pos_to_gene_map[i] for i in np.where((genotype > 0.5) * 1)[0]]
    refDF = pd.read_csv('KOREF.txt',
                        delimiter='\t',
                        sep='delimiter',
                        header=None,
                        names=['id', 'rid'])

    def give_number(string):
        return int(string[-5:])

    refDF['id'] = refDF['id'].apply(give_number)
    refDF['rid'] = refDF['rid'].apply(give_number)
    kos_to_rxns = refDF.loc[refDF['id'].isin(kos), :]
    can = refDF.loc[refDF['id'].isin(kos), :]['rid'].values
    can = ['R' + (5 - len(str(int(e)))) * '0' + str(int(e)) for e in can]
    can = list((set(can)) & set(rxns))
    avrxns = np.array([rxn_kegg_to_id[e] for e in can])
    avScopeRxn = giveScope(rxnMat[avrxns], prodMat[avrxns], seedVec,
                           sumRxnVec[avrxns])[1]
    scopeRxns = np.nonzero(avrxns * avScopeRxn)[0]
    ints = [rxn_id_to_kegg[e] for e in avrxns[scopeRxns]]
    mints = [int(l[1:]) for l in ints]
    nkos = uniqify(
        kos_to_rxns.loc[kos_to_rxns['rid'].isin(mints)]['id'].tolist())
    kos_to_rxns = refDF.loc[refDF['id'].isin(nkos), :]
    return [
        e for e in kos_to_rxns['id'].tolist()
        if e not in list(firsts) + list(lasts)
    ]

Ejemplo n.º 3

0

Mostrar archivo

Archivo: feapi.py Proyecto: null0x4d5a/ir-scripts

def genIndex(host, data):
    hosts = []
    i = 0
    while i < len(data["alert"]):
        hosts.append(data["alert"][i][host]["ip"])
        i += 1
    index = uniqify(hosts)
    return index

Ejemplo n.º 4

0

Mostrar archivo

Archivo: malware-tickets.py Proyecto: dpekkarinen/ir-scripts

def genIndex():
    hosts = []
    i = 0
    while i < len(data["alert"]):
        hosts.append(data["alert"][i][sys_type]["ip"])
        i += 1
    index = uniqify(hosts)
    return index

Ejemplo n.º 5

0

Mostrar archivo

def genRandOrg(pathDict):
    orgRxns = np.array([])
    for coreTBP in Core:

        # Picking a path at random from the dictionary that generates
        # the current core molecule.
        orgRxns = np.append(orgRxns, random.choice(pathDict[coreTBP]))

    # Returning the unique bunch of reactions that correspond
    # to the individual.
    return np.array(uniqify(orgRxns)).astype(int)

Ejemplo n.º 6

0

Mostrar archivo

def propagate_rxns_for_medium(orgrxns, medium):
    # Defining the seed set to be the medium and the currency,
    seedVec = np.zeros(len(rxnMat.T))
    seedVec[[kegg_to_id[e] for e in Currency + medium]] = 1

    # Getting all the reactions performable by this organism.
    avrxns = orgrxns[:]

    # Calculating the metabolites within the scope of
    # this organism's reaction network.
    scopeMets = giveScope(rxnMat[avrxns], prodMat[avrxns], seedVec,
                          sumRxnVec[avrxns])[0]

    # Finding how much of the core is within the network's scope.
    return uniqify([id_to_kegg[e] for e in np.where(scopeMets)[0]])

Ejemplo n.º 7

0

Mostrar archivo

def sortedGenOrg(pathDict, sortFunc=fitCost, optFunc=max):
    orgRxns = np.array([])
    for coreTBP in Core:

        # Creating a list of size of pathways that produce the
        # current core molecule.
        sortList = [sortFunc(path) for path in pathDict[coreTBP]]

        # Picking that path which has smallest size.
        orgRxns = np.append(
            orgRxns, pathDict[coreTBP][sortList.index(optFunc(sortList))])

    # Returning the unique bunch of reactions that correspond
    # to the individual.
    return np.array(uniqify(orgRxns)).astype(int)

Ejemplo n.º 8

0

Mostrar archivo

def propagate_single_for_medium(org, medium):
    # Defining the seed set to be the medium and the currency,
    seedVec = np.zeros(len(rxnMat.T))
    seedVec[[kegg_to_id[e] for e in Currency + medium]] = 1

    # Getting all the reactions performable by this organism.
    can = ''.join(open('strain_reactions/' + org + '.txt', 'r').readlines()).split()
    can = list((set(can)) & set(rxns))
    avrxns = [rxn_kegg_to_id[e] for e in can]

    # Calculating the metabolites within the scope of 
    # this organism's reaction network. 
    scopeMets = giveScope(rxnMat[avrxns], prodMat[avrxns], seedVec, sumRxnVec[avrxns])[0]
    
    # Finding how much of the core is within the network's scope.
    return uniqify([id_to_kegg[e] for e in np.where(scopeMets)[0]])

Ejemplo n.º 9

0

Mostrar archivo

def propagate_core_for_medium(corerxns, medium):
    # Defining the seed set to be the medium and the currency,
    seedVec = np.zeros(len(rxnMat.T))
    seedVec[[kegg_to_id[e] for e in Currency + medium]] = 1

    # Getting all the reactions performable by these corerxns.
    can = ['R' + (5 - len(str(int(e)))) * '0' + str(int(e)) for e in corerxns]
    can = list((set(can)) & set(rxns))
    avrxns = [rxn_kegg_to_id[e] for e in can]

    # Calculating the metabolites within the scope of 
    # this organism's reaction network. 
    scopeMets = giveScope(rxnMat[avrxns], prodMat[avrxns], seedVec, sumRxnVec[avrxns])[0]
    
    # Finding how much of the core is within the network's scope.
    return uniqify([id_to_kegg[e] for e in np.where(scopeMets)[0]])

Ejemplo n.º 10

0

Mostrar archivo

def core_rxns(strains, CUTOFF=0.95):
    all_sets = []
    for org in strains:
        # Getting all the reactions performable by this organism.
        can = ''.join(open('strain_reactions/' + org + '.txt', 'r').readlines()).split()
        can = list((set(can)) & set(rxns))
        all_sets.append(set([rxn_kegg_to_id[e] for e in can]))

    pangenes = uniqify(unlistify(all_sets))
    core_genes = []
    for gene in pangenes:
        g_frac = 0.0
        for oi, org in enumerate(strains):
            if gene in all_sets[oi]:
                g_frac += 1
        if g_frac / len(strains) >= CUTOFF:
            core_genes.append(gene)

    return core_genes

Ejemplo n.º 11

0

Mostrar archivo

import requests
import pandas as pd
from bs4 import BeautifulSoup
from tqdm import tqdm
import urllib.request
import numpy as np
from uniqify import uniqify
from unlistify import unlistify
import os
import json
from load_kegg import *

# Getting the set of bacterial abbreviations for each organism in KEGG.
pangenome_df = pd.read_csv('fuller_pangenome_df.csv')
old_pangenome_df = pd.read_csv('pangenome_df.csv')
species = uniqify(list(pangenome_df['species'].values))
old_species = uniqify(list(old_pangenome_df['species'].values))
all_strains = uniqify(list(pangenome_df['strain'].values))
strain_abbrs = uniqify(list(pangenome_df['kegg_abbr'].values))
index_array = np.array(list(range(len(all_strains))))

# Getting all SEED organisms.
seed_orgs = pd.read_excel('seed_orgs.xlsx')
list_seed_names = list(seed_orgs['seed_name'].values)
seed_onames = [s[s.find("(")+1:s.find(")")] for s in list_seed_names]
smap = dict(zip(seed_onames, list_seed_names))

exact_in_seed_dict = {}
exact_species = {}
for s in list(set(seed_onames) & set(all_strains)):
    exact_in_seed_dict[smap[s]] = pangenome_df.loc[

Ejemplo n.º 12

0

Mostrar archivo

Archivo: rxn_removal_tests.py Proyecto: eltanin4/cross_feeding

from load_data import *
from uniqify import uniqify
from unlistify import unlistify

coreProdRxns = {coreTBP: (prodMat[:, coreTBP] == 1) * 1 for coreTBP in Core}
frglList = []
NUM_RAND_ORGS = 1000

for thisIter in tqdm(range(NUM_RAND_ORGS)):
    orgPathDict = {}
    for coreTBP in Core:
        orgPathDict[coreTBP] = list(
            random.choice(pathDict[coreTBP]).astype(int))

    # Storing the list of reactions.
    orgRxns = np.array(uniqify(unlistify(orgPathDict.values()))).astype(int)

    # Moving through all reactions in the organism.
    remRxn = np.random.choice(orgRxns)
    thisRxnList = []
    for coreTBP in Core:
        if remRxn in orgPathDict[coreTBP]:
            thisRxnList.append(coreTBP)

    frglList.append(len(thisRxnList))

import matplotlib.pyplot as plt

fig, ax = plt.subplots(1)
myWeights = np.ones_like(frglList) / len(frglList)
ax.hist(frglList, bins=7, color='gray', weights=myWeights)

Ejemplo n.º 13

0

Mostrar archivo

            if quitFlag:
                break
            while True:
                if tTried > 1e5:
                    quitFlag = True
                    break
                tFlag = False
                tTried += 1
                # Generate a random organism.
                while True:
                    orgPathDict = {}
                    for coreTBP in Core:
                        orgPathDict[ coreTBP ] = list( random.choice( pathDict[ coreTBP ] ).astype(int) )
                    
                    # Storing the list of reactions.
                    orgRxns = np.array( uniqify( unlistify( orgPathDict.values() ) ) ).astype( int )

                    # Creating a dictionary of secretions.
                    orgSecDict = {}
                    for coreTBP in Core:
                        orgSecDict[ coreTBP ] = list( np.nonzero( pathwaySecByproducts( 
                                                      orgPathDict[ coreTBP ], orgRxns, 
                                                      rxnMat, prodMat, Core ) )[0] )
                    
                    # Purging duplicates to have only unique byproducts.
                    tempSet = set()
                    fullSet = unlistify( orgSecDict.values() )
                    duplicates = set(x for x in fullSet if x in tempSet or tempSet.add(x))
                    orgSecDict = { coreTBP: list( set( orgSecDict[ coreTBP ] ).difference( duplicates ) ) 
                                   for coreTBP in Core }

Ejemplo n.º 14

0

Mostrar archivo

        tO = anc_recon_table.loc[anc_recon_table['Node'] == node.name[1:-1]]
    else:
        tO = anc_recon_table.loc[anc_recon_table['Node'] == node.name]

    return tO['Prob'].values


# Now traversing the tree and inferring ancestral states for all unmarked nodes.
for thisNode in nodes:
    try:
        thisNode.genotype
    except:
        thisNode.add_feature('genotype',
                             reconAncestor(anc_recon_table, thisNode))

gene_ids = sorted(uniqify(unlistify(list(geneDict.values()))))
gene_ids = list(np.array(gene_ids)[good_indices])


# Using first ancestral genotype inference method to calculate gains and losses.
def giveGainsAndLosses(parent, child):
    gainGenes, lostGenes = set(), set()
    for indx, geneID in enumerate(gene_ids):
        parentProb, childProb = parent.genotype[indx], child.genotype[indx]

        # Order is present, absent, gain and loss.
        prsnProb = parentProb * childProb
        absnProb = (1 - parentProb) * (1 - childProb)
        gainProb = (1 - parentProb) * childProb
        lossProb = parentProb * (1 - childProb)

Ejemplo n.º 15

0

Mostrar archivo

Archivo: views.py Proyecto: SeanConnell/windwatchdb

def _get_unique_slices_list(day):
    weather_slices = WeatherTimeSlice.objects.filter(day_of_occurance=day)
    return uniqify(weather_slices, lambda x: x.id)

Ejemplo n.º 16

0

Mostrar archivo

Archivo: views.py Proyecto: SeanConnell/windwatchdb

def _get_unique_weather_list(site):
    wwq = WeatherWatchQueue.objects.get(relevant_site=site)
    raw_weather_list = DayOfWeather.objects.filter(weather_stream=wwq).order_by("date_it_happens")
    return uniqify(raw_weather_list, lambda x: x.as_machine_timestring())

Ejemplo n.º 17

0

Mostrar archivo

Archivo: bigg_to_kegg.py Proyecto: eltanin4/black_queen_critique

#     urllib.request.urlretrieve( thisURL, saveDir + thisOrg + '.txt' )

# Tracking the biomass reaction name in each organism's reaction set.
saveDir = 'bigg_orgs/'
biomass_url_holder = 'http://bigg.ucsd.edu/api/v2/models/'
org_bm_list = []
for thisOrg in tqdm(org_biggids):
    with open(saveDir + thisOrg + '.txt', 'r') as f:
        s = f.readline()
        thisBMname = re.findall(r'\"bigg_id\": \"(BIOMASS.*?)\"', s)[0]
        org_bm_list.append(thisBMname)
        # thisURL = biomass_url_holder + thisOrg + '/reactions/' + thisBMname
        # urllib.request.urlretrieve(thisURL, saveDir + thisBMname + '.txt')

# Getting the unique list.
org_bm_list = uniqify(org_bm_list)

#-------------------------------------------------------------------------
# Now mapping the metabolite names to possible KEGG IDs.
#-------------------------------------------------------------------------
bmKEGGS = []
for thisBMname in tqdm(org_bm_list):
    thisbm = open(saveDir + thisBMname + '.txt', 'r').readline()
    biggids = re.findall(r'\"bigg_id\": \"(.*?)\"', thisbm)[:-2]
    bmMetsDF = pd.DataFrame({'universal_bigg_id': biggids})
    filt_bigg_df = bigg_df.merge(bmMetsDF, on='universal_bigg_id')
    relLinks = list(filt_bigg_df['database_links'].values)

    # Now identifying all the KEGG IDs.
    for tl in relLinks:
        try:

Ejemplo n.º 18

0

Mostrar archivo

baddeds_all = []
cwastes_all = []
while len(fitterDB) < 500:
    print_progress_bar(len(fitterDB), 500, 'Building mutualisms database')
    while True:
        tFlag = False
        tTried += 1
        # Generate a random organism.
        while True:
            orgPathDict = {}
            for coreTBP in Core:
                orgPathDict[coreTBP] = list(
                    random.choice(pathDict[coreTBP]).astype(int))

            # Storing the list of reactions.
            orgRxns = np.array(uniqify(unlistify(
                orgPathDict.values()))).astype(int)

            # Creating a dictionary of secretions.
            orgSecDict = {}
            for coreTBP in Core:
                orgSecDict[coreTBP] = list(
                    np.nonzero(
                        pathwaySecByproducts(orgPathDict[coreTBP], orgRxns,
                                             rxnMat, prodMat, Core))[0])

            # Purging duplicates to have only unique byproducts.
            tempSet = set()
            fullSet = unlistify(orgSecDict.values())
            duplicates = set(x for x in fullSet
                             if x in tempSet or tempSet.add(x))
            orgSecDict = {

Ejemplo n.º 19

0

Mostrar archivo

Archivo: test_growth.py Proyecto: eltanin4/pangenome_dep

                       'r').readlines()).split()
    can = list((set(can)) & set(rxns))
    avrxns = [rxn_kegg_to_id[e] for e in can]

    # Calculating the metabolites within the scope of
    # this organism's reaction network.
    scopeMets = giveScope(rxnMat[avrxns], prodMat[avrxns], seedVec,
                          sumRxnVec[avrxns])[0]

    # Finding how much of the core is within the network's scope.
    return uniqify([id_to_kegg[e] for e in np.where(scopeMets)[0]])


# Getting the set of bacterial abbreviations for each organism in KEGG.
pangenome_df = pd.read_csv('pangenome_df.csv')
species = uniqify(list(pangenome_df['species'].values))
all_strains = uniqify(list(pangenome_df['kegg_abbr'].values))
index_array = np.array(list(range(len(all_strains))))

# Generating a vector of all metabolites initially provided, i.e. seeds.
seeds_df = pd.read_csv('../black_queen_critique/seeds_from_vitkup.csv')
media = list(seeds_df['kegg_id'].values)
media_sets = list(itertools.combinations(media, 1))

# Generating the null distribution.
# Getting random pairs.
NUM_SAMPLES = 1000
sample_indices = collect_samples(index_array, 2, NUM_SAMPLES)[:NUM_SAMPLES]
samples = [[all_strains[j] for j in sample_indices[i]]
           for i in range(len(sample_indices))]