Ejemplo n.º 1
0
def simpleCohort():
    delete('Cohort', 'TestCohort', force=True)
    # Create the simple cohort
    a = Accession('Sample1', files=['file1.txt', 'file2.txt'], type='WGS')
    b = Accession('Sample2', files=['file1.txt', 'file2.txt'], type='WGS')
    c = Accession('Sample3', files=['file1.txt', 'file2.txt'], type='CHIP')
    d = Accession('Sample4', files=['file1.txt', 'file2.txt'], type='CHIP')

    x = Cohort('TestCohort')
    for acc in [a, b, c, d]:
        x.add_accession(acc)
    return x
Ejemplo n.º 2
0
def simpleCohort():
    delete("Cohort", "TestCohort", force=True)
    # Create the simple cohort
    a = Accession("Sample1", files=["file1.txt", "file2.txt"], type="WGS")
    b = Accession("Sample2", files=["file1.txt", "file2.txt"], type="WGS")
    c = Accession("Sample3", files=["file1.txt", "file2.txt"], type="CHIP")
    d = Accession("Sample4", files=["file1.txt", "file2.txt"], type="CHIP")

    x = Cohort("TestCohort")
    for acc in [a, b, c, d]:
        x.add_accession(acc)
    return x
Ejemplo n.º 3
0
def simpleCohort():
    FreezableAPI.delete("Cohort", "TestCohort")
    # Create the simple cohort
    a = Accession("Sample1", files=["file1.txt", "file2.txt"], type="WGS")
    b = Accession("Sample2", files=["file1.txt", "file2.txt"], type="WGS")
    c = Accession("Sample3", files=["file1.txt", "file2.txt"], type="CHIP")
    d = Accession("Sample4", files=["file1.txt", "file2.txt"], type="CHIP")

    x = Cohort("TestCohort")
    for acc in [a, b, c, d]:
        x.add_accession(acc)
    yield x
    FreezableAPI.delete(x.m80.dtype, x.m80.name)
Ejemplo n.º 4
0
def test_add_accession(simpleCohort):
    a = Accession('Sample4', files=['file1.txt', 'file2.txt'], type='CHIP')
    if a in simpleCohort:
        del simpleCohort[a]
    start_len = len(simpleCohort)
    simpleCohort.add_accession(a)
    assert len(simpleCohort) == start_len + 1
Ejemplo n.º 5
0
def test_add_accession(simpleCohort):
    a = Accession("Sample4", files=["file1.txt", "file2.txt"], type="CHIP")
    if a in simpleCohort:
        del simpleCohort[a]
    start_len = len(simpleCohort)
    simpleCohort.add_accession(a)
    assert len(simpleCohort) == start_len + 1
Ejemplo n.º 6
0
def test_delitem(simpleCohort):
    a = Accession("TESTSAMPLE_IGNORE", files=["file1.txt", "file2.txt"], type="CHIP")
    if a not in simpleCohort:
        simpleCohort.add_accession(a)
    start_len = len(simpleCohort)
    del simpleCohort["TESTSAMPLE_IGNORE"]
    assert len(simpleCohort) == start_len - 1
Ejemplo n.º 7
0
    def __getitem__(self, name):
        '''
            Get an accession from the database the pythonic way.

            Paremeters
            ----------
            name : object
                Can be a string, i.e. the name or alias of an Accession,
                it can be an Actual Accession OR the AID which
                is an internal ID for accession
        '''
        AID = self._get_AID(name)
        cur = self._db.cursor()
        # Get the name based on AID
        name, = cur.execute('SELECT name FROM accessions WHERE AID = ?',
                            (AID, )).fetchone()
        metadata = {
            k: v
            for k, v in cur.execute(
                '''
                SELECT key, val FROM metadata WHERE AID = ?;
                ''', (AID, )).fetchall()
        }
        metadata['AID'] = AID
        files = [
            x[0] for x in cur.execute(
                '''
                SELECT url FROM files WHERE AID = ?;
            ''', (AID, )).fetchall()
        ]
        return Accession(name, files=files, **metadata)
Ejemplo n.º 8
0
def test_delitem(simpleCohort):
    a = Accession('TESTSAMPLE_IGNORE',
                  files=['file1.txt', 'file2.txt'],
                  type='CHIP')
    if a not in simpleCohort:
        simpleCohort.add_accession(a)
    start_len = len(simpleCohort)
    del simpleCohort['TESTSAMPLE_IGNORE']
    assert len(simpleCohort) == start_len - 1
Ejemplo n.º 9
0
def RNAAccession2():
    a = Accession('RNAAccession2',
                  files=[
                      './data/Sample2_ATGTCA_L005_R1_001.fastq',
                      './data/Sample2_ATGTCA_L005_R2_001.fastq',
                      './data/Sample2_ATGTCA_L006_R1_001.fastq',
                      './data/Sample2_ATGTCA_L006_R2_001.fastq',
                  ],
                  type='RNASeq')
    return a
Ejemplo n.º 10
0
def RNAAccession1():
    a = Accession('RNAAccession1',
                  files=[
                      './data/Sample1_ATGTCA_L007_R1_001.fastq',
                      './data/Sample1_ATGTCA_L007_R2_001.fastq',
                      './data/Sample1_ATGTCA_L008_R1_001.fastq',
                      './data/Sample1_ATGTCA_L008_R2_001.fastq',
                  ],
                  type='RNASeq')
    return a
Ejemplo n.º 11
0
def RNAAccession2():
    a = Accession(
        "RNAAccession2",
        files=[
            "./data/Sample2_ATGTCA_L005_R1_001.fastq",
            "./data/Sample2_ATGTCA_L005_R2_001.fastq",
            "./data/Sample2_ATGTCA_L006_R1_001.fastq",
            "./data/Sample2_ATGTCA_L006_R2_001.fastq",
        ],
        type="RNASeq",
    )
    return a
Ejemplo n.º 12
0
def RNAAccession1():
    a = Accession(
        "RNAAccession1",
        files=[
            "./data/Sample1_ATGTCA_L007_R1_001.fastq",
            "./data/Sample1_ATGTCA_L007_R2_001.fastq",
            "./data/Sample1_ATGTCA_L008_R1_001.fastq",
            "./data/Sample1_ATGTCA_L008_R2_001.fastq",
        ],
        type="RNASeq",
    )
    return a
Ejemplo n.º 13
0
    def add_accessions_from_data_frame(self, df, name_col):
        '''
            Add accessions from data frame. This assumes
            each row is an Accession and that the properties
            of the accession are stored in the columns. 

            Parameters
            ----------
            df : pandas.DataFrame
                The pandas data frame containing one accession
                per row
            name_col : string
                The column containing the accession names

            Example
            -------

            >>> df = pd.DataFrame( 
                  [['S1'    23    'O'],
                   ['S2'    30    'O+']],
                   columns =  ['Name','Age','Type']
                )
            >>> x = m80.add_accessions_from_data_frame(df,'Name')

            Would yield two Accessions: S1 and S2 with Age and Type
            properties.

        '''
        if name_col not in df.columns:
            raise ValueError(f'{name_col}S not a valid column name')
        # filter out rows with NaN name_col values
        # The tilda operator is a boolean inversion
        df = df.loc[~df[name_col].isnull(), :]
        accessions = []
        # Iterate over the rows and create and accessions from each one
        for i, row in df.iterrows():
            d = dict(row)
            name = d[name_col]
            del d[name_col]
            # Get rid of missing data
            for k, v in list(d.items()):
                if isinstance(v, numbers.Number) and math.isnan(v):
                    del d[k]
                else:
                    d[k] = str(v)
            accessions.append(Accession(name, files=None, **d))
        self.add_accessions(accessions)
Ejemplo n.º 14
0
def test_add_files():
    x = Accession("empty")
    x.add_files(["./test.txt", "test2.txt", "test3.txt"])
Ejemplo n.º 15
0
def test_add_relative_path():
    x = Accession("empty")
    x.add_file("./test.txt")
Ejemplo n.º 16
0
def simpleAccession():
    # Create a simple Accession
    return Accession("Sample1",
                     files=["file1.txt", "file2.txt"],
                     type="sample")
Ejemplo n.º 17
0
def test_from_accessions():
    a = Accession("Sample1", files=["file1.txt", "file2.txt"], type="WGS")
    b = Accession("Sample2", files=["file1.txt", "file2.txt"], type="WGS")
    c = Accession("Sample3", files=["file1.txt", "file2.txt"], type="CHIP")
    d = Accession("Sample4", files=["file1.txt", "file2.txt"], type="CHIP")
    Cohort.from_accessions("TestCohort", [a, b, c, d])
Ejemplo n.º 18
0
def test_add_files():
    x = Accession('empty')
    x.add_files(['./test.txt', 'test2.txt', 'test3.txt'])
Ejemplo n.º 19
0
def test_bare_accession():
    x = Accession("empty")
    assert isinstance(x, Accession)
    str(x)
    repr(x)
Ejemplo n.º 20
0
def test_from_accessions():
    a = Accession('Sample1', files=['file1.txt', 'file2.txt'], type='WGS')
    b = Accession('Sample2', files=['file1.txt', 'file2.txt'], type='WGS')
    c = Accession('Sample3', files=['file1.txt', 'file2.txt'], type='CHIP')
    d = Accession('Sample4', files=['file1.txt', 'file2.txt'], type='CHIP')
    x = Cohort.from_accessions('TestCohort', [a, b, c, d])
Ejemplo n.º 21
0
def simpleAccession():
    # Create a simple Accession
    return Accession('Sample1',
                     files=['file1.txt', 'file2.txt'],
                     type='sample')
Ejemplo n.º 22
0
def test_load_from_yaml():
    Accession.from_yaml("data/test_accession.yaml")
Ejemplo n.º 23
0
def test_add_relative_path():
    x = Accession('empty')
    x.add_file('./test.txt')