def simpleCohort(): delete('Cohort', 'TestCohort', force=True) # Create the simple cohort a = Accession('Sample1', files=['file1.txt', 'file2.txt'], type='WGS') b = Accession('Sample2', files=['file1.txt', 'file2.txt'], type='WGS') c = Accession('Sample3', files=['file1.txt', 'file2.txt'], type='CHIP') d = Accession('Sample4', files=['file1.txt', 'file2.txt'], type='CHIP') x = Cohort('TestCohort') for acc in [a, b, c, d]: x.add_accession(acc) return x
def simpleCohort(): delete("Cohort", "TestCohort", force=True) # Create the simple cohort a = Accession("Sample1", files=["file1.txt", "file2.txt"], type="WGS") b = Accession("Sample2", files=["file1.txt", "file2.txt"], type="WGS") c = Accession("Sample3", files=["file1.txt", "file2.txt"], type="CHIP") d = Accession("Sample4", files=["file1.txt", "file2.txt"], type="CHIP") x = Cohort("TestCohort") for acc in [a, b, c, d]: x.add_accession(acc) return x
def simpleCohort(): FreezableAPI.delete("Cohort", "TestCohort") # Create the simple cohort a = Accession("Sample1", files=["file1.txt", "file2.txt"], type="WGS") b = Accession("Sample2", files=["file1.txt", "file2.txt"], type="WGS") c = Accession("Sample3", files=["file1.txt", "file2.txt"], type="CHIP") d = Accession("Sample4", files=["file1.txt", "file2.txt"], type="CHIP") x = Cohort("TestCohort") for acc in [a, b, c, d]: x.add_accession(acc) yield x FreezableAPI.delete(x.m80.dtype, x.m80.name)
def test_add_accession(simpleCohort): a = Accession('Sample4', files=['file1.txt', 'file2.txt'], type='CHIP') if a in simpleCohort: del simpleCohort[a] start_len = len(simpleCohort) simpleCohort.add_accession(a) assert len(simpleCohort) == start_len + 1
def test_add_accession(simpleCohort): a = Accession("Sample4", files=["file1.txt", "file2.txt"], type="CHIP") if a in simpleCohort: del simpleCohort[a] start_len = len(simpleCohort) simpleCohort.add_accession(a) assert len(simpleCohort) == start_len + 1
def test_delitem(simpleCohort): a = Accession("TESTSAMPLE_IGNORE", files=["file1.txt", "file2.txt"], type="CHIP") if a not in simpleCohort: simpleCohort.add_accession(a) start_len = len(simpleCohort) del simpleCohort["TESTSAMPLE_IGNORE"] assert len(simpleCohort) == start_len - 1
def __getitem__(self, name): ''' Get an accession from the database the pythonic way. Paremeters ---------- name : object Can be a string, i.e. the name or alias of an Accession, it can be an Actual Accession OR the AID which is an internal ID for accession ''' AID = self._get_AID(name) cur = self._db.cursor() # Get the name based on AID name, = cur.execute('SELECT name FROM accessions WHERE AID = ?', (AID, )).fetchone() metadata = { k: v for k, v in cur.execute( ''' SELECT key, val FROM metadata WHERE AID = ?; ''', (AID, )).fetchall() } metadata['AID'] = AID files = [ x[0] for x in cur.execute( ''' SELECT url FROM files WHERE AID = ?; ''', (AID, )).fetchall() ] return Accession(name, files=files, **metadata)
def test_delitem(simpleCohort): a = Accession('TESTSAMPLE_IGNORE', files=['file1.txt', 'file2.txt'], type='CHIP') if a not in simpleCohort: simpleCohort.add_accession(a) start_len = len(simpleCohort) del simpleCohort['TESTSAMPLE_IGNORE'] assert len(simpleCohort) == start_len - 1
def RNAAccession2(): a = Accession('RNAAccession2', files=[ './data/Sample2_ATGTCA_L005_R1_001.fastq', './data/Sample2_ATGTCA_L005_R2_001.fastq', './data/Sample2_ATGTCA_L006_R1_001.fastq', './data/Sample2_ATGTCA_L006_R2_001.fastq', ], type='RNASeq') return a
def RNAAccession1(): a = Accession('RNAAccession1', files=[ './data/Sample1_ATGTCA_L007_R1_001.fastq', './data/Sample1_ATGTCA_L007_R2_001.fastq', './data/Sample1_ATGTCA_L008_R1_001.fastq', './data/Sample1_ATGTCA_L008_R2_001.fastq', ], type='RNASeq') return a
def RNAAccession2(): a = Accession( "RNAAccession2", files=[ "./data/Sample2_ATGTCA_L005_R1_001.fastq", "./data/Sample2_ATGTCA_L005_R2_001.fastq", "./data/Sample2_ATGTCA_L006_R1_001.fastq", "./data/Sample2_ATGTCA_L006_R2_001.fastq", ], type="RNASeq", ) return a
def RNAAccession1(): a = Accession( "RNAAccession1", files=[ "./data/Sample1_ATGTCA_L007_R1_001.fastq", "./data/Sample1_ATGTCA_L007_R2_001.fastq", "./data/Sample1_ATGTCA_L008_R1_001.fastq", "./data/Sample1_ATGTCA_L008_R2_001.fastq", ], type="RNASeq", ) return a
def add_accessions_from_data_frame(self, df, name_col): ''' Add accessions from data frame. This assumes each row is an Accession and that the properties of the accession are stored in the columns. Parameters ---------- df : pandas.DataFrame The pandas data frame containing one accession per row name_col : string The column containing the accession names Example ------- >>> df = pd.DataFrame( [['S1' 23 'O'], ['S2' 30 'O+']], columns = ['Name','Age','Type'] ) >>> x = m80.add_accessions_from_data_frame(df,'Name') Would yield two Accessions: S1 and S2 with Age and Type properties. ''' if name_col not in df.columns: raise ValueError(f'{name_col}S not a valid column name') # filter out rows with NaN name_col values # The tilda operator is a boolean inversion df = df.loc[~df[name_col].isnull(), :] accessions = [] # Iterate over the rows and create and accessions from each one for i, row in df.iterrows(): d = dict(row) name = d[name_col] del d[name_col] # Get rid of missing data for k, v in list(d.items()): if isinstance(v, numbers.Number) and math.isnan(v): del d[k] else: d[k] = str(v) accessions.append(Accession(name, files=None, **d)) self.add_accessions(accessions)
def test_add_files(): x = Accession("empty") x.add_files(["./test.txt", "test2.txt", "test3.txt"])
def test_add_relative_path(): x = Accession("empty") x.add_file("./test.txt")
def simpleAccession(): # Create a simple Accession return Accession("Sample1", files=["file1.txt", "file2.txt"], type="sample")
def test_from_accessions(): a = Accession("Sample1", files=["file1.txt", "file2.txt"], type="WGS") b = Accession("Sample2", files=["file1.txt", "file2.txt"], type="WGS") c = Accession("Sample3", files=["file1.txt", "file2.txt"], type="CHIP") d = Accession("Sample4", files=["file1.txt", "file2.txt"], type="CHIP") Cohort.from_accessions("TestCohort", [a, b, c, d])
def test_add_files(): x = Accession('empty') x.add_files(['./test.txt', 'test2.txt', 'test3.txt'])
def test_bare_accession(): x = Accession("empty") assert isinstance(x, Accession) str(x) repr(x)
def test_from_accessions(): a = Accession('Sample1', files=['file1.txt', 'file2.txt'], type='WGS') b = Accession('Sample2', files=['file1.txt', 'file2.txt'], type='WGS') c = Accession('Sample3', files=['file1.txt', 'file2.txt'], type='CHIP') d = Accession('Sample4', files=['file1.txt', 'file2.txt'], type='CHIP') x = Cohort.from_accessions('TestCohort', [a, b, c, d])
def simpleAccession(): # Create a simple Accession return Accession('Sample1', files=['file1.txt', 'file2.txt'], type='sample')
def test_load_from_yaml(): Accession.from_yaml("data/test_accession.yaml")
def test_add_relative_path(): x = Accession('empty') x.add_file('./test.txt')