def del_dataset(type,name,safe=True): c = co.Camoco("Camoco") if safe: c.log("Are you sure you want to delete {}",name) if input("[Y/n]:") != 'Y': c.log("Nothing Deleted") return c.log("Deleting {}",name) c.db.cursor().execute(''' DELETE FROM datasets WHERE name = '{}' and type = '{}';'''.format(name,type)) try: os.remove( os.path.expanduser(os.path.join( cf.get('options','basedir'), 'databases', '{}.{}.db'.format(type,name) ) ) ) except FileNotFoundError as e: c.log('Database Not Found: {}'.format(e)) try: os.remove( os.path.expanduser(os.path.join( cf.get('options','basedir'), 'databases', '{}.{}.hd5'.format(type,name) ) ) ) except FileNotFoundError as e: c.log('Database Not Found: {}'.format(e)) if type == 'Expr': # also have to remove the COB specific refgen del_dataset('RefGen','Filtered'+name,safe=safe)
def _tmpfile(self): # returns a handle to a tmp file return tempfile.NamedTemporaryFile( dir=os.path.join( cf.get('options','basedir'), "tmp" ) )
def _database(self,dbname,type=None): # return a connection if exists if type is None: # This lets us grab databases for other types type = self.type return lite.Connection( os.path.expanduser( os.path.join( cf.get('options','basedir'), 'databases', "{}.{}.db".format(type,dbname) ) ) )
def _hdf5(self,dbname,type=None): if type is None: type = self.type # return a connection if exists return pd.HDFStore( os.path.expanduser( os.path.join( cf.get('options','basedir'), 'databases', "{}.{}.hd5".format(type,dbname) ) ) )
def ZmRoot(self): co.del_dataset('Expr','ZmRoot',safe=False) ZM = co.RefGen('Zm5bFGS') ZmRoot = co.COB.from_table( os.path.join(cf.get('options','testdir'),'raw','Expression','ROOTFPKM.tsv'), 'ZmRoot', 'Maize Root Network', ZM, rawtype='RNASEQ', max_gene_missing_data=0.4, min_expr=0.1, dry_run=False, max_val=300 )
def ZmSAM(self): co.del_dataset('Expr','ZmSAM',safe=False) ZM = co.RefGen('Zm5bFGS') ZmSAM = co.COB.from_table( os.path.join( cf.get('options','testdir'),'raw','Expression', 'TranscriptomeProfiling_B73_Atlas_SAM_FGS_LiLin_20140316.txt' ), 'ZmSAM', 'Maize Root Network', ZM, rawtype='RNASEQ', max_gene_missing_data=0.4, min_expr=0.1, dry_run=False, max_val=300 )
def ZmRoot(self): co.del_dataset('Expr','ZmRoot',safe=False) ZM = co.RefGen('Zm5bFGS') ZmRoot = co.COB.from_table( os.path.join(cf.get('options','testdir'),'raw','Expression', 'RNASEQ','ROOTFPKM.tsv'), 'ZmRoot', 'Maize Root Network', ZM, rawtype='RNASEQ', max_gene_missing_data=0.3, max_accession_missing_data=0.08, min_single_sample_expr=1, min_expr=0.001, quantile=False, max_val=300 ) self.assertIsInstance(ZmRoot,co.COB)
def AtLeafHydroIonome(self): co.del_dataset('Ontology','AtLeafHydroIonome',safe=False) # glob glob is god csvs = glob.glob(os.path.join( cf.get('options','testdir'), 'raw','GWAS','AtLeafHydro', '*.sigsnps.csv' )) # Read in each table individually then concat for GIANT table df = pd.concat([pd.read_table(x,sep=',') for x in csvs]) # Add 'Chr' to chromosome column df.CHR = df.CHR.apply(lambda x: 'Chr'+str(x)) # Chase dat refgen T10 = co.RefGen('T10') # Import class from dataframe AtLeafHydroIonome = co.Ontology.from_DataFrame( df,'AtLeafHydroIonome','Arabidopsis 1.6M EmmaX GWAS', T10,term_col='Trait',chr_col='CHR',pos_col='BP' ) self.assertIsInstance(AtLeafHydroIonome,co.Ontology)
def ZmPAN(self): co.del_dataset('Expr','ZmPAN',safe=False) ZM = co.RefGen('Zm5bFGS') ZmPAN = co.COB.from_table( os.path.join( cf.get('options','testdir'),'raw','Expression','RNASEQ', 'PANGenomeFPKM.txt' ), 'ZmPAN', 'Maize Root Network', ZM, rawtype='RNASEQ', max_gene_missing_data=0.4, min_expr=1, quantile=False, dry_run=False, sep=',', max_val=300 ) self.assertIsInstance(ZmPAN,co.COB)
def ZmIonome(self): # Delete the old dataset co.del_dataset('Ontology','ZmIonome',safe=False) # Grab path the csv csv = os.path.join( cf.get('options','testdir'), 'raw','GWAS','Ionome', 'sigGWASsnpsCombinedIterations.longhorn.allLoc.csv' ) # Define our reference geneome ZM = co.RefGen('Zm5bFGS') df = pd.DataFrame.from_csv(csv,index_col=None) # Import class from dataframe IONS = co.Ontology.from_DataFrame( df,'ZmIonome','Maize Ionome', ZM,term_col='el',chr_col='chr',pos_col='pos' ) IONS.del_term('Co59') # I guess we need a test in here too self.assertIsInstance(IONS,co.Ontology)
def create(cls,name,description,type='Camoco'): ''' This is a class method to create a new camoco type object. It initializes base directory hierarchy ''' basedir = os.path.realpath( os.path.expanduser(cf.get('options','basedir')) ) # Create the basedir if not exists try: os.makedirs(basedir,exist_ok=True) os.makedirs(os.path.join(basedir,"logs"),exist_ok=True) os.makedirs(os.path.join(basedir,"databases"),exist_ok=True) os.makedirs(os.path.join(basedir,"analyses"),exist_ok=True) os.makedirs(os.path.join(basedir,"tmp"),exist_ok=True) except Exception as e: log(' Could not create files in {}',basedir) raise try: # Create the base camoco database lite.Connection( os.path.join(basedir,'databases','Camoco.Camoco.db') ).cursor().execute(''' CREATE TABLE IF NOT EXISTS datasets ( name TEXT NOT NULL, description TEXT, type TEXT, added datetime DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(name,type) ); INSERT OR IGNORE INTO datasets (name,description,type) VALUES ('Camoco','Camoco base','Camoco'); INSERT OR FAIL INTO datasets (name,description,type) VALUES (?,?,?)''',(name,description,type) ) except ConstraintError as e: log.warn('CAUTION! {}.{} Database already exists.',name,type) self = cls(name) return self
def create(cls, name, description, type='Camoco'): ''' This is a class method to create a new camoco type object. It initializes base directory hierarchy ''' basedir = os.path.realpath( os.path.expanduser(cf.get('options', 'basedir'))) # Create the basedir if not exists try: os.makedirs(basedir, exist_ok=True) os.makedirs(os.path.join(basedir, "logs"), exist_ok=True) os.makedirs(os.path.join(basedir, "databases"), exist_ok=True) os.makedirs(os.path.join(basedir, "analyses"), exist_ok=True) os.makedirs(os.path.join(basedir, "tmp"), exist_ok=True) except Exception as e: log(' Could not create files in {}', basedir) raise try: # Create the base camoco database lite.Connection( os.path.join(basedir, 'databases', 'Camoco.Camoco.db')).cursor().execute( ''' CREATE TABLE IF NOT EXISTS datasets ( name TEXT NOT NULL, description TEXT, type TEXT, added datetime DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(name,type) ); INSERT OR IGNORE INTO datasets (name,description,type) VALUES ('Camoco','Camoco base','Camoco'); INSERT OR FAIL INTO datasets (name,description,type) VALUES (?,?,?)''', (name, description, type)) except ConstraintError as e: log.warn('CAUTION! {}.{} Database already exists.', name, type) self = cls(name) return self
def _tmpfile(self): # returns a handle to a tmp file return tempfile.NamedTemporaryFile( dir=os.path.join(cf.get('options', 'basedir'), "tmp"))
#!/usr/bin/python3 import unittest import os import camoco as co import pandas as pd from camoco.Config import cf # Set the basedir to the testdir cf.set('options','basedir', cf.get('options','testdir')) # write test case to import refgen from GFF class LocusBase(unittest.TestCase): def test_locus_initialization(self): # numeric chromosomes a = co.Locus(1,500) self.assertIsInstance(a,co.Locus) class RefGenBase(unittest.TestCase): def BuildT10(self): gff = os.path.join(cf.get('options','testdir'),'raw','TAIR10_GFF3_genes.gff') co.del_dataset('RefGen','T10',safe=False) T10 = co.RefGen.from_gff(gff,'T10','Tair 10','10','Arabidopsis') self.assertIsInstance(T10,co.RefGen) def BuildZm5bFGS(self): gff = os.path.join(cf.get('options','testdir'),'raw','ZmB73_5b_FGS.gff') co.del_dataset('RefGen','Zm5bFGS',safe=False) ZM = co.RefGen.from_gff(gff,'Zm5bFGS','Maize 5b Filtered Gene Set','5b','Zea Mays')
def _resource(self, type, filename): return os.path.expanduser( os.path.join(cf.get('options', 'basedir'), type, filename))
def _resource(self,type,filename): return os.path.expanduser(os.path.join(cf.get('options','basedir'),type,filename))
def BuildT10(self): gff = os.path.join(cf.get('options','testdir'),'raw','TAIR10_GFF3_genes.gff') co.del_dataset('RefGen','T10',safe=False) T10 = co.RefGen.from_gff(gff,'T10','Tair 10','10','Arabidopsis') self.assertIsInstance(T10,co.RefGen)
def BuildIonome(self): csv = os.path.join(cf.get('options','testdir'),'raw','sigGWASsnpsCombinedIterations.longhorn.allLoc.csv') ZM = co.RefGen('Zm5bFGS') df = pd.DataFrame.from_csv(csv,index_col=None) IONS = co.Ontology.from_DataFrame(df,'ZmIonome','Maize Ionome',ZM,term_col='el',chr_col='chr',pos_col='pos'); self.assertIsInstance(IONS,co.Ontology)
def BuildZm5bFGS(self): gff = os.path.join(cf.get('options','testdir'),'raw','ZmB73_5b_FGS.gff') co.del_dataset('RefGen','Zm5bFGS',safe=False) ZM = co.RefGen.from_gff(gff,'Zm5bFGS','Maize 5b Filtered Gene Set','5b','Zea Mays') self.assertIsInstance(ZM,co.RefGen)