Esempio n. 1
0
    def setUp(self):
        sample_nums = range(1, 9)

        colnames = ["gene_id"] + ["sample" + str(x) for x in sample_nums]

        roles = ['feature_id']
        for i in range(len(sample_nums)):
            roles.append('sample')

        schema = Schema(column_names=colnames, column_roles=roles)

        schema.add_factor('treated', [False, True])
        schema.add_factor('sex', ['male', 'female'])

        factor_table = [('sample1', 'male', False), ('sample2', 'male', False),
                        ('sample3', 'female', False),
                        ('sample4', 'female', False),
                        ('sample5', 'male', True), ('sample6', 'male', True),
                        ('sample7', 'female', True),
                        ('sample8', 'female', True)]

        for row in factor_table:
            (name, sex, treated) = row
            schema.set_factor(name, 'sex', sex)
            schema.set_factor(name, 'treated', treated)

        self.schema = schema
Esempio n. 2
0
File: newjob.py Progetto: itmat/pade
    def schema(self):

        wf = current_workflow()
        columns = np.array(wf.field_names)
        roles   = np.array(wf.column_roles)
        factors = self.factor_values.keys()
        
        if columns is None or roles is None or len(columns) == 0 or len(roles) == 0:
            raise Exception("I can't create a schema without columns or roles")

        schema = Schema(map(str, columns),
                        map(str, wf.column_roles))

        for factor, values in self.factor_values.items():
            schema.add_factor(str(factor), map(str, values))

        counter = 0

        for i, c in enumerate(columns[roles == 'sample']):
            for j, f in enumerate(factors):
                try:
                    value = self.column_label_form.assignments[counter].data
                except IndexError as e:
                    raise Exception("No assignment " + str(counter))
                schema.set_factor(str(c), str(f), str(value))
                counter += 1

        return schema
Esempio n. 3
0
    def setUp(self):
        sample_nums = range(1, 9)

        colnames = ["gene_id"] + ["sample" + str(x) for x in sample_nums]
        
        roles = ['feature_id']
        for i in range(len(sample_nums)):
            roles.append('sample')

        schema = Schema(
            column_names=colnames,
            column_roles=roles)

        schema.add_factor('treated', [False, True])
        schema.add_factor('sex', ['male', 'female'])
        
        factor_table = [
                ('sample1', 'male',   False),
                ('sample2', 'male',   False),
                ('sample3', 'female', False),
                ('sample4', 'female', False),
                ('sample5', 'male',   True),
                ('sample6', 'male',   True),
                ('sample7', 'female', True),
                ('sample8', 'female', True)]

        for row in factor_table:
            (name, sex, treated) = row
            schema.set_factor(name, 'sex', sex)
            schema.set_factor(name, 'treated', treated)

        self.schema = schema
Esempio n. 4
0
    def test_jobs(self):

        # Set up the raw file
        raw_file_meta = self.mdb.add_input_file(name="test.txt",
                                                description="Some comments",
                                                stream=StringIO("a\nb\nc\n"))

        schema = Schema()
        schema.add_factor('treated', [False, True])
        schema.set_columns(['id', 'a', 'b'],
                           ['feature_id', 'sample', 'sample'])
        schema.set_factor('a', 'treated', False)
        schema.set_factor('b', 'treated', True)

        schema_meta = self.mdb.add_schema("First one", "The first one", schema,
                                          raw_file_meta)

        a = self.mdb.add_job(name="job1",
                             description="Some job",
                             raw_file_meta=raw_file_meta,
                             schema_meta=schema_meta)
        b = self.mdb.add_job(name="job2",
                             description="Other job",
                             raw_file_meta=raw_file_meta,
                             schema_meta=schema_meta)
        # Make sure it returned the object appropriately
        self.assertEquals(a.name, "job1")
        self.assertEquals(a.description, "Some job")

        a = self.mdb.job(a.obj_id)
        self.assertEquals(a.raw_file_id, raw_file_meta.obj_id)
        self.assertEquals(a.schema_id, schema_meta.obj_id)
        self.assertFalse(a.imported)

        # Make sure we can list all input files
        jobs = self.mdb.all_jobs()
        self.assertEquals(len(jobs), 2)
        names = set(['job1', 'job2'])
        self.assertEquals(names, set([x.name for x in jobs]))

        job_ids = self.mdb.jobs_for_schema(schema_meta.obj_id)
        self.assertTrue(a.obj_id in job_ids)
        self.assertTrue(b.obj_id in job_ids)

        job_ids = self.mdb.jobs_for_raw_file(raw_file_meta.obj_id)
        self.assertTrue(a.obj_id in job_ids)
        self.assertTrue(b.obj_id in job_ids)
Esempio n. 5
0
    def setup_paired_schema(self):
        persons = 'abc'
        treateds = 'yn'

        self.paired_schema = Schema(['id', 'ay', 'an', 'by', 'bn', 'cy', 'cn'],
                                    [
                                        'feature_id', 'sample', 'sample',
                                        'sample', 'sample', 'sample', 'sample'
                                    ])

        self.paired_schema.add_factor('person', list(persons))
        self.paired_schema.add_factor('treated', list(treateds))

        for p in persons:
            for t in treateds:
                col = p + t
                self.paired_schema.set_factor(col, 'person', p)
                self.paired_schema.set_factor(col, 'treated', t)
Esempio n. 6
0
    def setup_three_cond_schema(self):
        genders = 'mf'
        dosages = 'lmh'
        repnums = map(str, range(4))

        prod = list(product(genders, dosages, repnums))

        col_names = ['id'] + ["".join(x) for x in prod]
        col_roles = ['feature_id'] + list(repeat('sample', len(prod)))

        self.three_cond_schema = Schema(col_names, col_roles)

        self.three_cond_schema.add_factor('gender', list(genders))
        self.three_cond_schema.add_factor('dosage', list(dosages))

        for (g, d, r) in prod:
            col = g + d + r
            self.three_cond_schema.set_factor(col, 'gender', g)
            self.three_cond_schema.set_factor(col, 'dosage', d)
Esempio n. 7
0
def init_schema(infile=None):
    """Creates a new schema based on the given infile.

    Does not save it or make any changes to the state of the file
    system.

    """
    csvfile = csv.DictReader(infile, delimiter="\t")
    roles = ['sample' for i in csvfile.fieldnames]
    roles[0] = 'feature_id'
    return Schema(column_names=csvfile.fieldnames, column_roles=roles)
Esempio n. 8
0
    def schema(self):

        wf = current_workflow()
        columns = np.array(wf.field_names)
        roles = np.array(wf.column_roles)
        factors = self.factor_values.keys()

        if columns is None or roles is None or len(columns) == 0 or len(
                roles) == 0:
            raise Exception("I can't create a schema without columns or roles")

        schema = Schema(map(str, columns), map(str, wf.column_roles))

        for factor, values in self.factor_values.items():
            schema.add_factor(str(factor), map(str, values))

        counter = 0

        for i, c in enumerate(columns[roles == 'sample']):
            for j, f in enumerate(factors):
                try:
                    value = self.column_label_form.assignments[counter].data
                except IndexError as e:
                    raise Exception("No assignment " + str(counter))
                schema.set_factor(str(c), str(f), str(value))
                counter += 1

        return schema
Esempio n. 9
0
    def test_yaml(self):
        self.maxDiff = None
        # Save the schema, load it, and save it again. Compare the two
        # versions to make sure they're the same, so that we know we
        # can round-trip.
        out = StringIO()

        self.schema.save(out)
        loaded = Schema.load(out.getvalue())

        out2 = StringIO()
        loaded.save(out2)

        self.assertEquals(out.getvalue(), out2.getvalue())
Esempio n. 10
0
    def test_yaml(self):
        self.maxDiff = None
        # Save the schema, load it, and save it again. Compare the two
        # versions to make sure they're the same, so that we know we
        # can round-trip.
        out = StringIO()

        self.schema.save(out)
        loaded = Schema.load(out.getvalue())

        out2 = StringIO()
        loaded.save(out2)

        self.assertEquals(out.getvalue(),
                          out2.getvalue())
Esempio n. 11
0
    def setup_paired_schema(self):
        persons = 'abc'
        treateds = 'yn'

        self.paired_schema = Schema(['id', 'ay', 'an', 'by', 'bn', 'cy', 'cn'],
                                    ['feature_id', 'sample', 'sample', 'sample',
                                     'sample', 'sample', 'sample'])

        self.paired_schema.add_factor('person', list(persons))
        self.paired_schema.add_factor('treated', list(treateds))

        for p in persons:
            for t in treateds:
                col = p + t
                self.paired_schema.set_factor(col, 'person', p)
                self.paired_schema.set_factor(col, 'treated', t)
Esempio n. 12
0
    def test_jobs(self):

        # Set up the raw file
        raw_file_meta = self.mdb.add_input_file(
            name="test.txt", description="Some comments", stream=StringIO("a\nb\nc\n")
        )

        schema = Schema()
        schema.add_factor("treated", [False, True])
        schema.set_columns(["id", "a", "b"], ["feature_id", "sample", "sample"])
        schema.set_factor("a", "treated", False)
        schema.set_factor("b", "treated", True)

        schema_meta = self.mdb.add_schema("First one", "The first one", schema, raw_file_meta)

        a = self.mdb.add_job(name="job1", description="Some job", raw_file_meta=raw_file_meta, schema_meta=schema_meta)
        b = self.mdb.add_job(name="job2", description="Other job", raw_file_meta=raw_file_meta, schema_meta=schema_meta)
        # Make sure it returned the object appropriately
        self.assertEquals(a.name, "job1")
        self.assertEquals(a.description, "Some job")

        a = self.mdb.job(a.obj_id)
        self.assertEquals(a.raw_file_id, raw_file_meta.obj_id)
        self.assertEquals(a.schema_id, schema_meta.obj_id)
        self.assertFalse(a.imported)

        # Make sure we can list all input files
        jobs = self.mdb.all_jobs()
        self.assertEquals(len(jobs), 2)
        names = set(["job1", "job2"])
        self.assertEquals(names, set([x.name for x in jobs]))

        job_ids = self.mdb.jobs_for_schema(schema_meta.obj_id)
        self.assertTrue(a.obj_id in job_ids)
        self.assertTrue(b.obj_id in job_ids)

        job_ids = self.mdb.jobs_for_raw_file(raw_file_meta.obj_id)
        self.assertTrue(a.obj_id in job_ids)
        self.assertTrue(b.obj_id in job_ids)
Esempio n. 13
0
    def setup_three_cond_schema(self):
        genders = 'mf'
        dosages = 'lmh'
        repnums = map(str, range(4))

        prod = list(product(genders, dosages, repnums))

        col_names = ['id']         + [ "".join(x) for x in prod ]
        col_roles = ['feature_id'] + list(repeat('sample', len(prod)))

        self.three_cond_schema = Schema(col_names, col_roles)

        self.three_cond_schema.add_factor('gender', list(genders))
        self.three_cond_schema.add_factor('dosage', list(dosages))

        for (g, d, r) in prod:
            col = g + d + r
            self.three_cond_schema.set_factor(col, 'gender', g)
            self.three_cond_schema.set_factor(col, 'dosage', d)
Esempio n. 14
0
File: tasks.py Progetto: itmat/pade
def load_schema(db):
    schema_str = StringIO(db.attrs['schema'])
    return Schema.load(schema_str)
Esempio n. 15
0
class SettingValidationTest(unittest.TestCase):


    def setUp(self):
        self.setup_paired_schema()
        self.setup_three_cond_schema()

    def setup_paired_schema(self):
        persons = 'abc'
        treateds = 'yn'

        self.paired_schema = Schema(['id', 'ay', 'an', 'by', 'bn', 'cy', 'cn'],
                                    ['feature_id', 'sample', 'sample', 'sample',
                                     'sample', 'sample', 'sample'])

        self.paired_schema.add_factor('person', list(persons))
        self.paired_schema.add_factor('treated', list(treateds))

        for p in persons:
            for t in treateds:
                col = p + t
                self.paired_schema.set_factor(col, 'person', p)
                self.paired_schema.set_factor(col, 'treated', t)

    def setup_three_cond_schema(self):
        genders = 'mf'
        dosages = 'lmh'
        repnums = map(str, range(4))

        prod = list(product(genders, dosages, repnums))

        col_names = ['id']         + [ "".join(x) for x in prod ]
        col_roles = ['feature_id'] + list(repeat('sample', len(prod)))

        self.three_cond_schema = Schema(col_names, col_roles)

        self.three_cond_schema.add_factor('gender', list(genders))
        self.three_cond_schema.add_factor('dosage', list(dosages))

        for (g, d, r) in prod:
            col = g + d + r
            self.three_cond_schema.set_factor(col, 'gender', g)
            self.three_cond_schema.set_factor(col, 'dosage', d)


    def test_ftest_layouts(self):
        
        # Ftest can't be used when we have groups with only 1 replicate
        with self.assertRaises(UnsupportedLayoutException):
            Job(schema=self.paired_schema, 
                settings=Settings(
                    stat='f',
                    block_variables=['person'],
                    condition_variables=['treated']))

        # But it can be used if we take away blocking
        Job(schema=self.paired_schema, 
            settings=Settings(
                stat='f',
                condition_variables=['treated']))

        # We can use F with three conditions, without blocking...
        Job(schema=self.three_cond_schema, 
            settings=Settings(
                stat='f',
                condition_variables=['dosage']))        

        # ... and with blocking
        Job(schema=self.three_cond_schema, 
            settings=Settings(
                stat='f',
                block_variables=['gender'],
                condition_variables=['dosage']))        


    def test_one_sample_ttest_layouts(self):

        # We can use one-sample t with a paired layout, where we have
        # 1 condition factor with 2 values, 1 blocking factors with n
        # values, and exactly 1 replicate for each combination of
        # condition and block.
        Job(schema=self.paired_schema, 
            settings=Settings(
                stat='t',
                equalize_means=False,
                block_variables=['person'],
                condition_variables=['treated']))
        
        # If we take away the blocking factor it becomes invalid
        with self.assertRaisesRegexp(UnsupportedLayoutException, '.*pair.*'):
            Job(schema=self.paired_schema, 
                settings=Settings(
                    equalize_means=False,
                    stat='t',
                    condition_variables=['treated']))

        with self.assertRaisesRegexp(InvalidSettingsException, '.*equalize means.*'):
            Job(schema=self.paired_schema, 
                settings=Settings(
                    stat='t',
                    equalize_means=True,
                    block_variables=['person'],
                    condition_variables=['treated']))


    def test_means_ratio_layouts(self):

        # We can use means ratio as long as we have only two conditions
        Job(schema=self.paired_schema, 
            settings=Settings(
                equalize_means=False,
                stat='means_ratio',
                condition_variables=['treated']))
        Job(schema=self.paired_schema, 
            settings=Settings(
                equalize_means=False,
                stat='means_ratio',
                block_variables=['person'],
                condition_variables=['treated']))

        # We can't use means ratio if there are three conditions
        with self.assertRaises(UnsupportedLayoutException):
            Job(schema=self.three_cond_schema, 
                settings=Settings(
                    equalize_means=False,
                    stat='means_ratio',
                    condition_variables=['dosage']))        
        with self.assertRaises(UnsupportedLayoutException):
            Job(schema=self.three_cond_schema, 
                settings=Settings(
                    equalize_means=False,
                    stat='means_ratio',
                    block_variables=['gender'],
                    condition_variables=['dosage']))        

        with self.assertRaises(InvalidSettingsException):
            Job(schema=self.paired_schema, 
                settings=Settings(
                    equalize_means=True,
                    stat='means_ratio',
                    block_variables=['person'],
                    condition_variables=['treated']))

    def test_unknown_statistic(self):
        with self.assertRaises(UnknownStatisticException):
            Job(schema=self.paired_schema, 
                settings=Settings(
                    stat='BadStat',
                    condition_variables=['treated']))
Esempio n. 16
0
def load_schema(path):
    try:
        with open(path) as f:
            return Schema.load(f)
    except IOError as e:
        raise UsageException("Couldn't load schema: " + e.filename + ": " + e.strerror)    
Esempio n. 17
0
    def setUp(self):
        sample_nums = range(1, 13)

        colnames = ["gene_id"] + ["sample" + str(x) for x in sample_nums]

        roles = ['feature_id']
        for i in range(len(sample_nums)):
            roles.append('sample')

        schema = Schema(column_names=colnames, column_roles=roles)

        schema.add_factor('age', [2, 20, 55])
        schema.add_factor('sex', ['male', 'female'])
        schema.add_factor('treated', [False, True])

        counter = 0

        self.assertEquals(schema.sample_num("sample1"), 0)
        self.assertEquals(schema.sample_num("sample7"), 6)

        for sex in ['male', 'female']:
            for age in [2, 20, 55]:
                for treated in [True, False]:
                    counter += 1
                    name = "sample" + str(counter)
                    schema.set_factor(name, 'sex', sex)
                    schema.set_factor(name, 'age', age)
                    schema.set_factor(name, 'treated', treated)
        self.schema = schema
Esempio n. 18
0
    def test_model_dummy_vars_1(self):

        sample_nums = range(1, 13)

        colnames = ["gene_id"] + ["sample" + str(x) for x in sample_nums]
        roles = ['feature_id']
        for i in range(len(sample_nums)):
            roles.append('sample')

        schema = Schema(
            column_names=colnames,
            column_roles=roles)

        schema.add_factor('age', [2, 20, 55])
        schema.add_factor('sex', ['male', 'female'])
        schema.add_factor('treated', [False, True])

        counter = 0

        self.assertEquals(schema.sample_num("sample1"), 0);
        self.assertEquals(schema.sample_num("sample7"), 6);

        for sex in ['male', 'female']:
            for age in [2, 20, 55]:
                for treated in [True, False]:
                    counter += 1
                    name = "sample" + str(counter)
                    schema.set_factor(name, 'sex',     sex)
                    schema.set_factor(name, 'age',     age)
                    schema.set_factor(name, 'treated', treated)

        dummies = dummy_vars(schema, ['age', 'treated'], level=2)

        expected = DummyVarTable(
            ({}, {'age': 20}, {'age': 55}, {'treated': True}, {'age': 20, 'treated': True}, {'age': 55, 'treated': True}),
            [
                DummyVarAssignment(factor_values=(2, False),  bits=(True, False, False, False, False, False), indexes=['sample2', 'sample8']),
                DummyVarAssignment(factor_values=(2, True),   bits=(True, False, False, True, False, False), indexes=['sample1', 'sample7']),
                DummyVarAssignment(factor_values=(20, False), bits=(True, True, False, False, False, False), indexes=['sample4', 'sample10']),
                DummyVarAssignment(factor_values=(20, True),  bits=(True, True, False, True, True, False), indexes=['sample3', 'sample9']),
                DummyVarAssignment(factor_values=(55, False), bits=(True, False, True, False, False, False), indexes=['sample6', 'sample12']),
                DummyVarAssignment(factor_values=(55, True), bits=(True, False, True, True, False, True), indexes=['sample5', 'sample11'])])

        self.assertEquals(dummies, expected)
Esempio n. 19
0
    def test_ignore_columns(self):

        names = ["gene_id"]
        roles = ['feature_id']
        for i in range(8):
            names.append('sample_' + str(i))
            if (i % 2) == 0:
                roles.append('sample')
            else:
                roles.append(None)

        schema = Schema(column_names=names, column_roles=roles)
        self.assertEquals(len(schema.sample_column_names), 4)

        schema.add_factor('treated', [False, True])

        schema.set_factor('sample_0', 'treated', False)
        schema.set_factor('sample_2', 'treated', False)
        schema.set_factor('sample_4', 'treated', True)
        schema.set_factor('sample_6', 'treated', True)

        with self.assertRaises(Exception):
            schema.set_factor('sample_1' + str(i), 'treated', True)

        self.assertEquals(schema.possible_assignments(['treated']), [
            OrderedDict([('treated', False)]),
            OrderedDict([('treated', True)])
        ])

        self.assertEquals(
            schema.indexes_with_assignments(OrderedDict([('treated', False)])),
            [0, 1])

        self.assertEquals(
            schema.indexes_with_assignments(OrderedDict([('treated', True)])),
            [2, 3])

        self.assertEquals(
            schema.samples_with_assignments(OrderedDict([('treated', False)])),
            ['sample_0', 'sample_2'])

        self.assertEquals(
            schema.samples_with_assignments(OrderedDict([('treated', True)])),
            ['sample_4', 'sample_6'])

        out = StringIO()

        schema.save(out)
        loaded = Schema.load(out.getvalue())

        out2 = StringIO()
        loaded.save(out2)

        self.maxDiff = None
        self.assertEquals(out.getvalue(), out2.getvalue())
Esempio n. 20
0
    def setUp(self):
        sample_nums = range(1, 13)

        colnames = ["gene_id"] + ["sample" + str(x) for x in sample_nums]
        
        roles = ['feature_id']
        for i in range(len(sample_nums)):
            roles.append('sample')

        schema = Schema(
            column_names=colnames,
            column_roles=roles)

        schema.add_factor('age', [2, 20, 55])
        schema.add_factor('sex', ['male', 'female'])
        schema.add_factor('treated', [False, True])

        counter = 0

        self.assertEquals(schema.sample_num("sample1"), 0);
        self.assertEquals(schema.sample_num("sample7"), 6);

        for sex in ['male', 'female']:
            for age in [2, 20, 55]:
                for treated in [True, False]:
                    counter += 1
                    name = "sample" + str(counter)
                    schema.set_factor(name, 'sex',     sex)
                    schema.set_factor(name, 'age',     age)
                    schema.set_factor(name, 'treated', treated)
        self.schema = schema
Esempio n. 21
0
def load_schema(db):
    schema_str = StringIO(db.attrs['schema'])
    return Schema.load(schema_str)
Esempio n. 22
0
File: metadb.py Progetto: itmat/pade
 def load(self):
     with open(self.path) as f:
         return Schema.load(f)
Esempio n. 23
0
    def test_ignore_columns(self):
    
        names = ["gene_id"] 
        roles = ['feature_id']
        for i in range(8):
            names.append('sample_' + str(i))
            if (i % 2) == 0:
                roles.append('sample')
            else:
                roles.append(None)

        schema = Schema(
            column_names=names,
            column_roles=roles)
        self.assertEquals(len(schema.sample_column_names), 4)


        schema.add_factor('treated', [False, True])

        schema.set_factor('sample_0', 'treated', False)
        schema.set_factor('sample_2', 'treated', False)
        schema.set_factor('sample_4', 'treated', True)
        schema.set_factor('sample_6', 'treated', True)

        with self.assertRaises(Exception):
            schema.set_factor('sample_1' + str(i), 'treated', True)

        self.assertEquals(schema.possible_assignments(['treated']),
                          [OrderedDict([('treated', False)]), 
                           OrderedDict([('treated', True )])])

        self.assertEquals(schema.indexes_with_assignments(
                OrderedDict([('treated', False)])),
                          [0, 1])

        self.assertEquals(schema.indexes_with_assignments(
                OrderedDict([('treated', True)])),
                          [2, 3])

        self.assertEquals(schema.samples_with_assignments(
                OrderedDict([('treated', False)])),
                          ['sample_0', 'sample_2'])

        self.assertEquals(schema.samples_with_assignments(
                OrderedDict([('treated', True)])),
                          ['sample_4', 'sample_6'])
        
        out = StringIO()

        schema.save(out)
        loaded = Schema.load(out.getvalue())

        out2 = StringIO()
        loaded.save(out2)

        self.maxDiff = None
        self.assertEquals(out.getvalue(),
                          out2.getvalue())        
Esempio n. 24
0
    def test_model_dummy_vars_1(self):

        sample_nums = range(1, 13)

        colnames = ["gene_id"] + ["sample" + str(x) for x in sample_nums]
        roles = ['feature_id']
        for i in range(len(sample_nums)):
            roles.append('sample')

        schema = Schema(column_names=colnames, column_roles=roles)

        schema.add_factor('age', [2, 20, 55])
        schema.add_factor('sex', ['male', 'female'])
        schema.add_factor('treated', [False, True])

        counter = 0

        self.assertEquals(schema.sample_num("sample1"), 0)
        self.assertEquals(schema.sample_num("sample7"), 6)

        for sex in ['male', 'female']:
            for age in [2, 20, 55]:
                for treated in [True, False]:
                    counter += 1
                    name = "sample" + str(counter)
                    schema.set_factor(name, 'sex', sex)
                    schema.set_factor(name, 'age', age)
                    schema.set_factor(name, 'treated', treated)

        dummies = dummy_vars(schema, ['age', 'treated'], level=2)

        expected = DummyVarTable(({}, {
            'age': 20
        }, {
            'age': 55
        }, {
            'treated': True
        }, {
            'age': 20,
            'treated': True
        }, {
            'age': 55,
            'treated': True
        }), [
            DummyVarAssignment(factor_values=(2, False),
                               bits=(True, False, False, False, False, False),
                               indexes=['sample2', 'sample8']),
            DummyVarAssignment(factor_values=(2, True),
                               bits=(True, False, False, True, False, False),
                               indexes=['sample1', 'sample7']),
            DummyVarAssignment(factor_values=(20, False),
                               bits=(True, True, False, False, False, False),
                               indexes=['sample4', 'sample10']),
            DummyVarAssignment(factor_values=(20, True),
                               bits=(True, True, False, True, True, False),
                               indexes=['sample3', 'sample9']),
            DummyVarAssignment(factor_values=(55, False),
                               bits=(True, False, True, False, False, False),
                               indexes=['sample6', 'sample12']),
            DummyVarAssignment(factor_values=(55, True),
                               bits=(True, False, True, True, False, True),
                               indexes=['sample5', 'sample11'])
        ])

        self.assertEquals(dummies, expected)
Esempio n. 25
0
 def load(self):
     with open(self.path) as f:
         return Schema.load(f)
Esempio n. 26
0
    def test_schemas(self):
        rawfile = self.mdb.add_input_file(name="test.txt", description="Some comments", stream=StringIO("a\nb\nc\n"))

        schema_a = Schema()
        schema_a.add_factor("treated", [False, True])
        schema_a.set_columns(["id", "a", "b"], ["feature_id", "sample", "sample"])
        schema_a.set_factor("a", "treated", False)
        schema_a.set_factor("b", "treated", True)

        schema_b = Schema()
        schema_b.add_factor("age", ["young", "old"])
        schema_b.set_columns(["key", "foo", "bar"], ["feature_id", "sample", "sample"])
        schema_b.set_factor("foo", "age", "young")
        schema_b.set_factor("bar", "age", "old")

        a = self.mdb.add_schema("First one", "The first one", schema_a, rawfile)
        b = self.mdb.add_schema("Second", "Other", schema_b, rawfile)

        self.assertEquals(a.name, "First one")
        self.assertEquals(a.description, "The first one")

        schemas = self.mdb.all_schemas()
        self.assertEquals(len(schemas), 2)

        self.assertEquals(a.based_on_input_file_id, rawfile.obj_id)

        colnames = set()
        for s in schemas:
            schema = s.load()
            colnames.update(schema.column_names)
        self.assertEquals(colnames, set(["id", "a", "b", "key", "foo", "bar"]))

        schema_ids = self.mdb.schemas_based_on_input_file(a.based_on_input_file_id)
        self.assertTrue(a.obj_id in schema_ids)
        self.assertTrue(b.obj_id in schema_ids)
Esempio n. 27
0
class SettingValidationTest(unittest.TestCase):
    def setUp(self):
        self.setup_paired_schema()
        self.setup_three_cond_schema()

    def setup_paired_schema(self):
        persons = 'abc'
        treateds = 'yn'

        self.paired_schema = Schema(['id', 'ay', 'an', 'by', 'bn', 'cy', 'cn'],
                                    [
                                        'feature_id', 'sample', 'sample',
                                        'sample', 'sample', 'sample', 'sample'
                                    ])

        self.paired_schema.add_factor('person', list(persons))
        self.paired_schema.add_factor('treated', list(treateds))

        for p in persons:
            for t in treateds:
                col = p + t
                self.paired_schema.set_factor(col, 'person', p)
                self.paired_schema.set_factor(col, 'treated', t)

    def setup_three_cond_schema(self):
        genders = 'mf'
        dosages = 'lmh'
        repnums = map(str, range(4))

        prod = list(product(genders, dosages, repnums))

        col_names = ['id'] + ["".join(x) for x in prod]
        col_roles = ['feature_id'] + list(repeat('sample', len(prod)))

        self.three_cond_schema = Schema(col_names, col_roles)

        self.three_cond_schema.add_factor('gender', list(genders))
        self.three_cond_schema.add_factor('dosage', list(dosages))

        for (g, d, r) in prod:
            col = g + d + r
            self.three_cond_schema.set_factor(col, 'gender', g)
            self.three_cond_schema.set_factor(col, 'dosage', d)

    def test_ftest_layouts(self):

        # Ftest can't be used when we have groups with only 1 replicate
        with self.assertRaises(UnsupportedLayoutException):
            Job(schema=self.paired_schema,
                settings=Settings(stat='f',
                                  block_variables=['person'],
                                  condition_variables=['treated']))

        # But it can be used if we take away blocking
        Job(schema=self.paired_schema,
            settings=Settings(stat='f', condition_variables=['treated']))

        # We can use F with three conditions, without blocking...
        Job(schema=self.three_cond_schema,
            settings=Settings(stat='f', condition_variables=['dosage']))

        # ... and with blocking
        Job(schema=self.three_cond_schema,
            settings=Settings(stat='f',
                              block_variables=['gender'],
                              condition_variables=['dosage']))

    def test_one_sample_ttest_layouts(self):

        # We can use one-sample t with a paired layout, where we have
        # 1 condition factor with 2 values, 1 blocking factors with n
        # values, and exactly 1 replicate for each combination of
        # condition and block.
        Job(schema=self.paired_schema,
            settings=Settings(stat='t',
                              equalize_means=False,
                              block_variables=['person'],
                              condition_variables=['treated']))

        # If we take away the blocking factor it becomes invalid
        with self.assertRaisesRegexp(UnsupportedLayoutException, '.*pair.*'):
            Job(schema=self.paired_schema,
                settings=Settings(equalize_means=False,
                                  stat='t',
                                  condition_variables=['treated']))

        with self.assertRaisesRegexp(InvalidSettingsException,
                                     '.*equalize means.*'):
            Job(schema=self.paired_schema,
                settings=Settings(stat='t',
                                  equalize_means=True,
                                  block_variables=['person'],
                                  condition_variables=['treated']))

    def test_means_ratio_layouts(self):

        # We can use means ratio as long as we have only two conditions
        Job(schema=self.paired_schema,
            settings=Settings(equalize_means=False,
                              stat='means_ratio',
                              condition_variables=['treated']))
        Job(schema=self.paired_schema,
            settings=Settings(equalize_means=False,
                              stat='means_ratio',
                              block_variables=['person'],
                              condition_variables=['treated']))

        # We can't use means ratio if there are three conditions
        with self.assertRaises(UnsupportedLayoutException):
            Job(schema=self.three_cond_schema,
                settings=Settings(equalize_means=False,
                                  stat='means_ratio',
                                  condition_variables=['dosage']))
        with self.assertRaises(UnsupportedLayoutException):
            Job(schema=self.three_cond_schema,
                settings=Settings(equalize_means=False,
                                  stat='means_ratio',
                                  block_variables=['gender'],
                                  condition_variables=['dosage']))

        with self.assertRaises(InvalidSettingsException):
            Job(schema=self.paired_schema,
                settings=Settings(equalize_means=True,
                                  stat='means_ratio',
                                  block_variables=['person'],
                                  condition_variables=['treated']))

    def test_unknown_statistic(self):
        with self.assertRaises(UnknownStatisticException):
            Job(schema=self.paired_schema,
                settings=Settings(stat='BadStat',
                                  condition_variables=['treated']))
Esempio n. 28
0
    def test_schemas(self):
        rawfile = self.mdb.add_input_file(name="test.txt",
                                          description="Some comments",
                                          stream=StringIO("a\nb\nc\n"))

        schema_a = Schema()
        schema_a.add_factor('treated', [False, True])
        schema_a.set_columns(['id', 'a', 'b'],
                             ['feature_id', 'sample', 'sample'])
        schema_a.set_factor('a', 'treated', False)
        schema_a.set_factor('b', 'treated', True)

        schema_b = Schema()
        schema_b.add_factor('age', ['young', 'old'])
        schema_b.set_columns(['key', 'foo', 'bar'],
                             ['feature_id', 'sample', 'sample'])
        schema_b.set_factor('foo', 'age', 'young')
        schema_b.set_factor('bar', 'age', 'old')

        a = self.mdb.add_schema("First one", "The first one", schema_a,
                                rawfile)
        b = self.mdb.add_schema("Second", "Other", schema_b, rawfile)

        self.assertEquals(a.name, "First one")
        self.assertEquals(a.description, "The first one")

        schemas = self.mdb.all_schemas()
        self.assertEquals(len(schemas), 2)

        self.assertEquals(a.based_on_input_file_id, rawfile.obj_id)

        colnames = set()
        for s in schemas:
            schema = s.load()
            colnames.update(schema.column_names)
        self.assertEquals(colnames, set(['id', 'a', 'b', 'key', 'foo', 'bar']))

        schema_ids = self.mdb.schemas_based_on_input_file(
            a.based_on_input_file_id)
        self.assertTrue(a.obj_id in schema_ids)
        self.assertTrue(b.obj_id in schema_ids)