Beispiel #1
0
def del_dataset(type,name,safe=True):
    c = co.Camoco("Camoco")
    if safe:
        c.log("Are you sure you want to delete {}",name)
        if input("[Y/n]:") != 'Y':
            c.log("Nothing Deleted")
            return
    c.log("Deleting {}",name)
    c.db.cursor().execute(''' DELETE FROM datasets WHERE name = '{}' and type = '{}';'''.format(name,type))
    try:
        os.remove(
            os.path.expanduser(os.path.join(
                cf.get('options','basedir'),
                'databases',
                '{}.{}.db'.format(type,name)
                )
            )
        )
    except FileNotFoundError as e:
        c.log('Database Not Found: {}'.format(e))
    try:
        os.remove(
            os.path.expanduser(os.path.join(
                cf.get('options','basedir'),
                'databases',
                '{}.{}.hd5'.format(type,name)
                )
            )
        )
    except FileNotFoundError as e:
        c.log('Database Not Found: {}'.format(e))
    if type == 'Expr':
        # also have to remove the COB specific refgen
        del_dataset('RefGen','Filtered'+name,safe=safe)
Beispiel #2
0
 def _tmpfile(self):
     # returns a handle to a tmp file
     return tempfile.NamedTemporaryFile(
         dir=os.path.join(
             cf.get('options','basedir'),
             "tmp"
         )
     )
Beispiel #3
0
 def _database(self,dbname,type=None):
     # return a connection if exists
     if type is None: # This lets us grab databases for other types
         type = self.type
     return lite.Connection(
         os.path.expanduser(
             os.path.join(
                 cf.get('options','basedir'),
                 'databases',
                 "{}.{}.db".format(type,dbname)
             )
         )
     )
Beispiel #4
0
 def _hdf5(self,dbname,type=None):
     if type is None:
         type = self.type
     # return a connection if exists
     return pd.HDFStore(
         os.path.expanduser(
             os.path.join(
                 cf.get('options','basedir'),
                 'databases',
                 "{}.{}.hd5".format(type,dbname)
             )
         )
     )
Beispiel #5
0
 def ZmRoot(self):
     co.del_dataset('Expr','ZmRoot',safe=False)
     ZM = co.RefGen('Zm5bFGS')
     ZmRoot = co.COB.from_table(
         os.path.join(cf.get('options','testdir'),'raw','Expression','ROOTFPKM.tsv'),
         'ZmRoot',
         'Maize Root Network',
         ZM,
         rawtype='RNASEQ',
         max_gene_missing_data=0.4,
         min_expr=0.1,
         dry_run=False,
         max_val=300
     )
Beispiel #6
0
 def ZmSAM(self):
     co.del_dataset('Expr','ZmSAM',safe=False)
     ZM = co.RefGen('Zm5bFGS')
     ZmSAM = co.COB.from_table(
         os.path.join(
             cf.get('options','testdir'),'raw','Expression',
             'TranscriptomeProfiling_B73_Atlas_SAM_FGS_LiLin_20140316.txt'
         ),
         'ZmSAM',
         'Maize Root Network',
         ZM,
         rawtype='RNASEQ',
         max_gene_missing_data=0.4,
         min_expr=0.1,
         dry_run=False,
         max_val=300
     )
Beispiel #7
0
 def ZmRoot(self):
     co.del_dataset('Expr','ZmRoot',safe=False)
     ZM = co.RefGen('Zm5bFGS')
     ZmRoot = co.COB.from_table(
         os.path.join(cf.get('options','testdir'),'raw','Expression',
             'RNASEQ','ROOTFPKM.tsv'),
         'ZmRoot',
         'Maize Root Network',
         ZM,
         rawtype='RNASEQ',
         max_gene_missing_data=0.3,
         max_accession_missing_data=0.08,
         min_single_sample_expr=1,
         min_expr=0.001,
         quantile=False,
         max_val=300
     )
     self.assertIsInstance(ZmRoot,co.COB)
Beispiel #8
0
 def AtLeafHydroIonome(self):
     co.del_dataset('Ontology','AtLeafHydroIonome',safe=False)
     # glob glob is god
     csvs = glob.glob(os.path.join(
         cf.get('options','testdir'),
         'raw','GWAS','AtLeafHydro',
         '*.sigsnps.csv'
     ))
     # Read in each table individually then concat for GIANT table
     df = pd.concat([pd.read_table(x,sep=',') for x in csvs])
     # Add 'Chr' to chromosome column
     df.CHR = df.CHR.apply(lambda x: 'Chr'+str(x))
     # Chase dat refgen
     T10 = co.RefGen('T10')
     # Import class from dataframe
     AtLeafHydroIonome = co.Ontology.from_DataFrame(
         df,'AtLeafHydroIonome','Arabidopsis 1.6M EmmaX GWAS',
         T10,term_col='Trait',chr_col='CHR',pos_col='BP'
     )
     self.assertIsInstance(AtLeafHydroIonome,co.Ontology)
Beispiel #9
0
 def ZmPAN(self):
     co.del_dataset('Expr','ZmPAN',safe=False)
     ZM = co.RefGen('Zm5bFGS')
     ZmPAN = co.COB.from_table(
         os.path.join(
             cf.get('options','testdir'),'raw','Expression','RNASEQ',
             'PANGenomeFPKM.txt'
         ),
         'ZmPAN',
         'Maize Root Network',
         ZM,
         rawtype='RNASEQ',
         max_gene_missing_data=0.4,
         min_expr=1,
         quantile=False,
         dry_run=False,
         sep=',',
         max_val=300
     )
     self.assertIsInstance(ZmPAN,co.COB)
Beispiel #10
0
 def ZmIonome(self):
     # Delete the old dataset
     co.del_dataset('Ontology','ZmIonome',safe=False)
     # Grab path the csv
     csv = os.path.join(
         cf.get('options','testdir'),
         'raw','GWAS','Ionome',
         'sigGWASsnpsCombinedIterations.longhorn.allLoc.csv'
     )
     # Define our reference geneome
     ZM = co.RefGen('Zm5bFGS')
     df = pd.DataFrame.from_csv(csv,index_col=None)
     # Import class from dataframe
     IONS  = co.Ontology.from_DataFrame(
         df,'ZmIonome','Maize Ionome',
         ZM,term_col='el',chr_col='chr',pos_col='pos'
     )
     IONS.del_term('Co59')
     # I guess we need a test in here too
     self.assertIsInstance(IONS,co.Ontology)
Beispiel #11
0
    def create(cls,name,description,type='Camoco'):
        '''
            This is a class method to create a new camoco type object.
            It initializes base directory hierarchy 
        '''
        basedir = os.path.realpath(
            os.path.expanduser(cf.get('options','basedir'))
        )

        # Create the basedir if not exists
        try:    
            os.makedirs(basedir,exist_ok=True)
            os.makedirs(os.path.join(basedir,"logs"),exist_ok=True)
            os.makedirs(os.path.join(basedir,"databases"),exist_ok=True)
            os.makedirs(os.path.join(basedir,"analyses"),exist_ok=True)
            os.makedirs(os.path.join(basedir,"tmp"),exist_ok=True)
        except Exception as e:
            log(' Could not create files in {}',basedir)
            raise
        try:
        # Create the base camoco database
            lite.Connection(
                os.path.join(basedir,'databases','Camoco.Camoco.db')
            ).cursor().execute(''' 
                CREATE TABLE IF NOT EXISTS datasets (
                    name TEXT NOT NULL,
                    description TEXT,
                    type TEXT,
                    added datetime DEFAULT CURRENT_TIMESTAMP,
                    PRIMARY KEY(name,type)
                );
                INSERT OR IGNORE INTO datasets (name,description,type)
                VALUES ('Camoco','Camoco base','Camoco');
                INSERT OR FAIL INTO datasets (name,description,type)
                VALUES (?,?,?)''',(name,description,type)
            )
        except ConstraintError as e:
            log.warn('CAUTION! {}.{} Database already exists.',name,type)
        self = cls(name) 
        return self
Beispiel #12
0
    def create(cls, name, description, type='Camoco'):
        '''
            This is a class method to create a new camoco type object.
            It initializes base directory hierarchy 
        '''
        basedir = os.path.realpath(
            os.path.expanduser(cf.get('options', 'basedir')))
        # Create the basedir if not exists

        try:
            os.makedirs(basedir, exist_ok=True)
            os.makedirs(os.path.join(basedir, "logs"), exist_ok=True)
            os.makedirs(os.path.join(basedir, "databases"), exist_ok=True)
            os.makedirs(os.path.join(basedir, "analyses"), exist_ok=True)
            os.makedirs(os.path.join(basedir, "tmp"), exist_ok=True)
        except Exception as e:
            log(' Could not create files in {}', basedir)
            raise
        try:
            # Create the base camoco database
            lite.Connection(
                os.path.join(basedir, 'databases',
                             'Camoco.Camoco.db')).cursor().execute(
                                 ''' 
                CREATE TABLE IF NOT EXISTS datasets (
                    name TEXT NOT NULL,
                    description TEXT,
                    type TEXT,
                    added datetime DEFAULT CURRENT_TIMESTAMP,
                    PRIMARY KEY(name,type)
                );
                INSERT OR IGNORE INTO datasets (name,description,type)
                VALUES ('Camoco','Camoco base','Camoco');
                INSERT OR FAIL INTO datasets (name,description,type)
                VALUES (?,?,?)''', (name, description, type))
        except ConstraintError as e:
            log.warn('CAUTION! {}.{} Database already exists.', name, type)
        self = cls(name)
        return self
Beispiel #13
0
 def _tmpfile(self):
     # returns a handle to a tmp file
     return tempfile.NamedTemporaryFile(
         dir=os.path.join(cf.get('options', 'basedir'), "tmp"))
Beispiel #14
0
#!/usr/bin/python3

import unittest
import os

import camoco as co
import pandas as pd
from camoco.Config import cf

# Set the basedir to the testdir
cf.set('options','basedir', cf.get('options','testdir'))

# write test case to import refgen from GFF

class LocusBase(unittest.TestCase):
    def test_locus_initialization(self):
        # numeric chromosomes
        a = co.Locus(1,500)
        self.assertIsInstance(a,co.Locus)

class RefGenBase(unittest.TestCase):
    def BuildT10(self):
        gff = os.path.join(cf.get('options','testdir'),'raw','TAIR10_GFF3_genes.gff')
        co.del_dataset('RefGen','T10',safe=False)
        T10 = co.RefGen.from_gff(gff,'T10','Tair 10','10','Arabidopsis')
        self.assertIsInstance(T10,co.RefGen)

    def BuildZm5bFGS(self):
        gff = os.path.join(cf.get('options','testdir'),'raw','ZmB73_5b_FGS.gff')
        co.del_dataset('RefGen','Zm5bFGS',safe=False)
        ZM = co.RefGen.from_gff(gff,'Zm5bFGS','Maize 5b Filtered Gene Set','5b','Zea Mays')
Beispiel #15
0
 def _resource(self, type, filename):
     return os.path.expanduser(
         os.path.join(cf.get('options', 'basedir'), type, filename))
Beispiel #16
0
 def _resource(self,type,filename):
     return os.path.expanduser(os.path.join(cf.get('options','basedir'),type,filename))
Beispiel #17
0
 def BuildT10(self):
     gff = os.path.join(cf.get('options','testdir'),'raw','TAIR10_GFF3_genes.gff')
     co.del_dataset('RefGen','T10',safe=False)
     T10 = co.RefGen.from_gff(gff,'T10','Tair 10','10','Arabidopsis')
     self.assertIsInstance(T10,co.RefGen)
Beispiel #18
0
 def BuildIonome(self):
     csv = os.path.join(cf.get('options','testdir'),'raw','sigGWASsnpsCombinedIterations.longhorn.allLoc.csv')
     ZM = co.RefGen('Zm5bFGS')
     df = pd.DataFrame.from_csv(csv,index_col=None)
     IONS  = co.Ontology.from_DataFrame(df,'ZmIonome','Maize Ionome',ZM,term_col='el',chr_col='chr',pos_col='pos');
     self.assertIsInstance(IONS,co.Ontology)
Beispiel #19
0
 def BuildZm5bFGS(self):
     gff = os.path.join(cf.get('options','testdir'),'raw','ZmB73_5b_FGS.gff')
     co.del_dataset('RefGen','Zm5bFGS',safe=False)
     ZM = co.RefGen.from_gff(gff,'Zm5bFGS','Maize 5b Filtered Gene Set','5b','Zea Mays')
     self.assertIsInstance(ZM,co.RefGen)