def setUp(self): sample_nums = range(1, 9) colnames = ["gene_id"] + ["sample" + str(x) for x in sample_nums] roles = ['feature_id'] for i in range(len(sample_nums)): roles.append('sample') schema = Schema(column_names=colnames, column_roles=roles) schema.add_factor('treated', [False, True]) schema.add_factor('sex', ['male', 'female']) factor_table = [('sample1', 'male', False), ('sample2', 'male', False), ('sample3', 'female', False), ('sample4', 'female', False), ('sample5', 'male', True), ('sample6', 'male', True), ('sample7', 'female', True), ('sample8', 'female', True)] for row in factor_table: (name, sex, treated) = row schema.set_factor(name, 'sex', sex) schema.set_factor(name, 'treated', treated) self.schema = schema
def schema(self): wf = current_workflow() columns = np.array(wf.field_names) roles = np.array(wf.column_roles) factors = self.factor_values.keys() if columns is None or roles is None or len(columns) == 0 or len(roles) == 0: raise Exception("I can't create a schema without columns or roles") schema = Schema(map(str, columns), map(str, wf.column_roles)) for factor, values in self.factor_values.items(): schema.add_factor(str(factor), map(str, values)) counter = 0 for i, c in enumerate(columns[roles == 'sample']): for j, f in enumerate(factors): try: value = self.column_label_form.assignments[counter].data except IndexError as e: raise Exception("No assignment " + str(counter)) schema.set_factor(str(c), str(f), str(value)) counter += 1 return schema
def setUp(self): sample_nums = range(1, 9) colnames = ["gene_id"] + ["sample" + str(x) for x in sample_nums] roles = ['feature_id'] for i in range(len(sample_nums)): roles.append('sample') schema = Schema( column_names=colnames, column_roles=roles) schema.add_factor('treated', [False, True]) schema.add_factor('sex', ['male', 'female']) factor_table = [ ('sample1', 'male', False), ('sample2', 'male', False), ('sample3', 'female', False), ('sample4', 'female', False), ('sample5', 'male', True), ('sample6', 'male', True), ('sample7', 'female', True), ('sample8', 'female', True)] for row in factor_table: (name, sex, treated) = row schema.set_factor(name, 'sex', sex) schema.set_factor(name, 'treated', treated) self.schema = schema
def test_jobs(self): # Set up the raw file raw_file_meta = self.mdb.add_input_file(name="test.txt", description="Some comments", stream=StringIO("a\nb\nc\n")) schema = Schema() schema.add_factor('treated', [False, True]) schema.set_columns(['id', 'a', 'b'], ['feature_id', 'sample', 'sample']) schema.set_factor('a', 'treated', False) schema.set_factor('b', 'treated', True) schema_meta = self.mdb.add_schema("First one", "The first one", schema, raw_file_meta) a = self.mdb.add_job(name="job1", description="Some job", raw_file_meta=raw_file_meta, schema_meta=schema_meta) b = self.mdb.add_job(name="job2", description="Other job", raw_file_meta=raw_file_meta, schema_meta=schema_meta) # Make sure it returned the object appropriately self.assertEquals(a.name, "job1") self.assertEquals(a.description, "Some job") a = self.mdb.job(a.obj_id) self.assertEquals(a.raw_file_id, raw_file_meta.obj_id) self.assertEquals(a.schema_id, schema_meta.obj_id) self.assertFalse(a.imported) # Make sure we can list all input files jobs = self.mdb.all_jobs() self.assertEquals(len(jobs), 2) names = set(['job1', 'job2']) self.assertEquals(names, set([x.name for x in jobs])) job_ids = self.mdb.jobs_for_schema(schema_meta.obj_id) self.assertTrue(a.obj_id in job_ids) self.assertTrue(b.obj_id in job_ids) job_ids = self.mdb.jobs_for_raw_file(raw_file_meta.obj_id) self.assertTrue(a.obj_id in job_ids) self.assertTrue(b.obj_id in job_ids)
def setup_paired_schema(self): persons = 'abc' treateds = 'yn' self.paired_schema = Schema(['id', 'ay', 'an', 'by', 'bn', 'cy', 'cn'], [ 'feature_id', 'sample', 'sample', 'sample', 'sample', 'sample', 'sample' ]) self.paired_schema.add_factor('person', list(persons)) self.paired_schema.add_factor('treated', list(treateds)) for p in persons: for t in treateds: col = p + t self.paired_schema.set_factor(col, 'person', p) self.paired_schema.set_factor(col, 'treated', t)
def setup_three_cond_schema(self): genders = 'mf' dosages = 'lmh' repnums = map(str, range(4)) prod = list(product(genders, dosages, repnums)) col_names = ['id'] + ["".join(x) for x in prod] col_roles = ['feature_id'] + list(repeat('sample', len(prod))) self.three_cond_schema = Schema(col_names, col_roles) self.three_cond_schema.add_factor('gender', list(genders)) self.three_cond_schema.add_factor('dosage', list(dosages)) for (g, d, r) in prod: col = g + d + r self.three_cond_schema.set_factor(col, 'gender', g) self.three_cond_schema.set_factor(col, 'dosage', d)
def init_schema(infile=None): """Creates a new schema based on the given infile. Does not save it or make any changes to the state of the file system. """ csvfile = csv.DictReader(infile, delimiter="\t") roles = ['sample' for i in csvfile.fieldnames] roles[0] = 'feature_id' return Schema(column_names=csvfile.fieldnames, column_roles=roles)
def schema(self): wf = current_workflow() columns = np.array(wf.field_names) roles = np.array(wf.column_roles) factors = self.factor_values.keys() if columns is None or roles is None or len(columns) == 0 or len( roles) == 0: raise Exception("I can't create a schema without columns or roles") schema = Schema(map(str, columns), map(str, wf.column_roles)) for factor, values in self.factor_values.items(): schema.add_factor(str(factor), map(str, values)) counter = 0 for i, c in enumerate(columns[roles == 'sample']): for j, f in enumerate(factors): try: value = self.column_label_form.assignments[counter].data except IndexError as e: raise Exception("No assignment " + str(counter)) schema.set_factor(str(c), str(f), str(value)) counter += 1 return schema
def test_yaml(self): self.maxDiff = None # Save the schema, load it, and save it again. Compare the two # versions to make sure they're the same, so that we know we # can round-trip. out = StringIO() self.schema.save(out) loaded = Schema.load(out.getvalue()) out2 = StringIO() loaded.save(out2) self.assertEquals(out.getvalue(), out2.getvalue())
def setup_paired_schema(self): persons = 'abc' treateds = 'yn' self.paired_schema = Schema(['id', 'ay', 'an', 'by', 'bn', 'cy', 'cn'], ['feature_id', 'sample', 'sample', 'sample', 'sample', 'sample', 'sample']) self.paired_schema.add_factor('person', list(persons)) self.paired_schema.add_factor('treated', list(treateds)) for p in persons: for t in treateds: col = p + t self.paired_schema.set_factor(col, 'person', p) self.paired_schema.set_factor(col, 'treated', t)
def test_jobs(self): # Set up the raw file raw_file_meta = self.mdb.add_input_file( name="test.txt", description="Some comments", stream=StringIO("a\nb\nc\n") ) schema = Schema() schema.add_factor("treated", [False, True]) schema.set_columns(["id", "a", "b"], ["feature_id", "sample", "sample"]) schema.set_factor("a", "treated", False) schema.set_factor("b", "treated", True) schema_meta = self.mdb.add_schema("First one", "The first one", schema, raw_file_meta) a = self.mdb.add_job(name="job1", description="Some job", raw_file_meta=raw_file_meta, schema_meta=schema_meta) b = self.mdb.add_job(name="job2", description="Other job", raw_file_meta=raw_file_meta, schema_meta=schema_meta) # Make sure it returned the object appropriately self.assertEquals(a.name, "job1") self.assertEquals(a.description, "Some job") a = self.mdb.job(a.obj_id) self.assertEquals(a.raw_file_id, raw_file_meta.obj_id) self.assertEquals(a.schema_id, schema_meta.obj_id) self.assertFalse(a.imported) # Make sure we can list all input files jobs = self.mdb.all_jobs() self.assertEquals(len(jobs), 2) names = set(["job1", "job2"]) self.assertEquals(names, set([x.name for x in jobs])) job_ids = self.mdb.jobs_for_schema(schema_meta.obj_id) self.assertTrue(a.obj_id in job_ids) self.assertTrue(b.obj_id in job_ids) job_ids = self.mdb.jobs_for_raw_file(raw_file_meta.obj_id) self.assertTrue(a.obj_id in job_ids) self.assertTrue(b.obj_id in job_ids)
def setup_three_cond_schema(self): genders = 'mf' dosages = 'lmh' repnums = map(str, range(4)) prod = list(product(genders, dosages, repnums)) col_names = ['id'] + [ "".join(x) for x in prod ] col_roles = ['feature_id'] + list(repeat('sample', len(prod))) self.three_cond_schema = Schema(col_names, col_roles) self.three_cond_schema.add_factor('gender', list(genders)) self.three_cond_schema.add_factor('dosage', list(dosages)) for (g, d, r) in prod: col = g + d + r self.three_cond_schema.set_factor(col, 'gender', g) self.three_cond_schema.set_factor(col, 'dosage', d)
def load_schema(db): schema_str = StringIO(db.attrs['schema']) return Schema.load(schema_str)
class SettingValidationTest(unittest.TestCase): def setUp(self): self.setup_paired_schema() self.setup_three_cond_schema() def setup_paired_schema(self): persons = 'abc' treateds = 'yn' self.paired_schema = Schema(['id', 'ay', 'an', 'by', 'bn', 'cy', 'cn'], ['feature_id', 'sample', 'sample', 'sample', 'sample', 'sample', 'sample']) self.paired_schema.add_factor('person', list(persons)) self.paired_schema.add_factor('treated', list(treateds)) for p in persons: for t in treateds: col = p + t self.paired_schema.set_factor(col, 'person', p) self.paired_schema.set_factor(col, 'treated', t) def setup_three_cond_schema(self): genders = 'mf' dosages = 'lmh' repnums = map(str, range(4)) prod = list(product(genders, dosages, repnums)) col_names = ['id'] + [ "".join(x) for x in prod ] col_roles = ['feature_id'] + list(repeat('sample', len(prod))) self.three_cond_schema = Schema(col_names, col_roles) self.three_cond_schema.add_factor('gender', list(genders)) self.three_cond_schema.add_factor('dosage', list(dosages)) for (g, d, r) in prod: col = g + d + r self.three_cond_schema.set_factor(col, 'gender', g) self.three_cond_schema.set_factor(col, 'dosage', d) def test_ftest_layouts(self): # Ftest can't be used when we have groups with only 1 replicate with self.assertRaises(UnsupportedLayoutException): Job(schema=self.paired_schema, settings=Settings( stat='f', block_variables=['person'], condition_variables=['treated'])) # But it can be used if we take away blocking Job(schema=self.paired_schema, settings=Settings( stat='f', condition_variables=['treated'])) # We can use F with three conditions, without blocking... Job(schema=self.three_cond_schema, settings=Settings( stat='f', condition_variables=['dosage'])) # ... and with blocking Job(schema=self.three_cond_schema, settings=Settings( stat='f', block_variables=['gender'], condition_variables=['dosage'])) def test_one_sample_ttest_layouts(self): # We can use one-sample t with a paired layout, where we have # 1 condition factor with 2 values, 1 blocking factors with n # values, and exactly 1 replicate for each combination of # condition and block. Job(schema=self.paired_schema, settings=Settings( stat='t', equalize_means=False, block_variables=['person'], condition_variables=['treated'])) # If we take away the blocking factor it becomes invalid with self.assertRaisesRegexp(UnsupportedLayoutException, '.*pair.*'): Job(schema=self.paired_schema, settings=Settings( equalize_means=False, stat='t', condition_variables=['treated'])) with self.assertRaisesRegexp(InvalidSettingsException, '.*equalize means.*'): Job(schema=self.paired_schema, settings=Settings( stat='t', equalize_means=True, block_variables=['person'], condition_variables=['treated'])) def test_means_ratio_layouts(self): # We can use means ratio as long as we have only two conditions Job(schema=self.paired_schema, settings=Settings( equalize_means=False, stat='means_ratio', condition_variables=['treated'])) Job(schema=self.paired_schema, settings=Settings( equalize_means=False, stat='means_ratio', block_variables=['person'], condition_variables=['treated'])) # We can't use means ratio if there are three conditions with self.assertRaises(UnsupportedLayoutException): Job(schema=self.three_cond_schema, settings=Settings( equalize_means=False, stat='means_ratio', condition_variables=['dosage'])) with self.assertRaises(UnsupportedLayoutException): Job(schema=self.three_cond_schema, settings=Settings( equalize_means=False, stat='means_ratio', block_variables=['gender'], condition_variables=['dosage'])) with self.assertRaises(InvalidSettingsException): Job(schema=self.paired_schema, settings=Settings( equalize_means=True, stat='means_ratio', block_variables=['person'], condition_variables=['treated'])) def test_unknown_statistic(self): with self.assertRaises(UnknownStatisticException): Job(schema=self.paired_schema, settings=Settings( stat='BadStat', condition_variables=['treated']))
def load_schema(path): try: with open(path) as f: return Schema.load(f) except IOError as e: raise UsageException("Couldn't load schema: " + e.filename + ": " + e.strerror)
def setUp(self): sample_nums = range(1, 13) colnames = ["gene_id"] + ["sample" + str(x) for x in sample_nums] roles = ['feature_id'] for i in range(len(sample_nums)): roles.append('sample') schema = Schema(column_names=colnames, column_roles=roles) schema.add_factor('age', [2, 20, 55]) schema.add_factor('sex', ['male', 'female']) schema.add_factor('treated', [False, True]) counter = 0 self.assertEquals(schema.sample_num("sample1"), 0) self.assertEquals(schema.sample_num("sample7"), 6) for sex in ['male', 'female']: for age in [2, 20, 55]: for treated in [True, False]: counter += 1 name = "sample" + str(counter) schema.set_factor(name, 'sex', sex) schema.set_factor(name, 'age', age) schema.set_factor(name, 'treated', treated) self.schema = schema
def test_model_dummy_vars_1(self): sample_nums = range(1, 13) colnames = ["gene_id"] + ["sample" + str(x) for x in sample_nums] roles = ['feature_id'] for i in range(len(sample_nums)): roles.append('sample') schema = Schema( column_names=colnames, column_roles=roles) schema.add_factor('age', [2, 20, 55]) schema.add_factor('sex', ['male', 'female']) schema.add_factor('treated', [False, True]) counter = 0 self.assertEquals(schema.sample_num("sample1"), 0); self.assertEquals(schema.sample_num("sample7"), 6); for sex in ['male', 'female']: for age in [2, 20, 55]: for treated in [True, False]: counter += 1 name = "sample" + str(counter) schema.set_factor(name, 'sex', sex) schema.set_factor(name, 'age', age) schema.set_factor(name, 'treated', treated) dummies = dummy_vars(schema, ['age', 'treated'], level=2) expected = DummyVarTable( ({}, {'age': 20}, {'age': 55}, {'treated': True}, {'age': 20, 'treated': True}, {'age': 55, 'treated': True}), [ DummyVarAssignment(factor_values=(2, False), bits=(True, False, False, False, False, False), indexes=['sample2', 'sample8']), DummyVarAssignment(factor_values=(2, True), bits=(True, False, False, True, False, False), indexes=['sample1', 'sample7']), DummyVarAssignment(factor_values=(20, False), bits=(True, True, False, False, False, False), indexes=['sample4', 'sample10']), DummyVarAssignment(factor_values=(20, True), bits=(True, True, False, True, True, False), indexes=['sample3', 'sample9']), DummyVarAssignment(factor_values=(55, False), bits=(True, False, True, False, False, False), indexes=['sample6', 'sample12']), DummyVarAssignment(factor_values=(55, True), bits=(True, False, True, True, False, True), indexes=['sample5', 'sample11'])]) self.assertEquals(dummies, expected)
def test_ignore_columns(self): names = ["gene_id"] roles = ['feature_id'] for i in range(8): names.append('sample_' + str(i)) if (i % 2) == 0: roles.append('sample') else: roles.append(None) schema = Schema(column_names=names, column_roles=roles) self.assertEquals(len(schema.sample_column_names), 4) schema.add_factor('treated', [False, True]) schema.set_factor('sample_0', 'treated', False) schema.set_factor('sample_2', 'treated', False) schema.set_factor('sample_4', 'treated', True) schema.set_factor('sample_6', 'treated', True) with self.assertRaises(Exception): schema.set_factor('sample_1' + str(i), 'treated', True) self.assertEquals(schema.possible_assignments(['treated']), [ OrderedDict([('treated', False)]), OrderedDict([('treated', True)]) ]) self.assertEquals( schema.indexes_with_assignments(OrderedDict([('treated', False)])), [0, 1]) self.assertEquals( schema.indexes_with_assignments(OrderedDict([('treated', True)])), [2, 3]) self.assertEquals( schema.samples_with_assignments(OrderedDict([('treated', False)])), ['sample_0', 'sample_2']) self.assertEquals( schema.samples_with_assignments(OrderedDict([('treated', True)])), ['sample_4', 'sample_6']) out = StringIO() schema.save(out) loaded = Schema.load(out.getvalue()) out2 = StringIO() loaded.save(out2) self.maxDiff = None self.assertEquals(out.getvalue(), out2.getvalue())
def setUp(self): sample_nums = range(1, 13) colnames = ["gene_id"] + ["sample" + str(x) for x in sample_nums] roles = ['feature_id'] for i in range(len(sample_nums)): roles.append('sample') schema = Schema( column_names=colnames, column_roles=roles) schema.add_factor('age', [2, 20, 55]) schema.add_factor('sex', ['male', 'female']) schema.add_factor('treated', [False, True]) counter = 0 self.assertEquals(schema.sample_num("sample1"), 0); self.assertEquals(schema.sample_num("sample7"), 6); for sex in ['male', 'female']: for age in [2, 20, 55]: for treated in [True, False]: counter += 1 name = "sample" + str(counter) schema.set_factor(name, 'sex', sex) schema.set_factor(name, 'age', age) schema.set_factor(name, 'treated', treated) self.schema = schema
def load(self): with open(self.path) as f: return Schema.load(f)
def test_ignore_columns(self): names = ["gene_id"] roles = ['feature_id'] for i in range(8): names.append('sample_' + str(i)) if (i % 2) == 0: roles.append('sample') else: roles.append(None) schema = Schema( column_names=names, column_roles=roles) self.assertEquals(len(schema.sample_column_names), 4) schema.add_factor('treated', [False, True]) schema.set_factor('sample_0', 'treated', False) schema.set_factor('sample_2', 'treated', False) schema.set_factor('sample_4', 'treated', True) schema.set_factor('sample_6', 'treated', True) with self.assertRaises(Exception): schema.set_factor('sample_1' + str(i), 'treated', True) self.assertEquals(schema.possible_assignments(['treated']), [OrderedDict([('treated', False)]), OrderedDict([('treated', True )])]) self.assertEquals(schema.indexes_with_assignments( OrderedDict([('treated', False)])), [0, 1]) self.assertEquals(schema.indexes_with_assignments( OrderedDict([('treated', True)])), [2, 3]) self.assertEquals(schema.samples_with_assignments( OrderedDict([('treated', False)])), ['sample_0', 'sample_2']) self.assertEquals(schema.samples_with_assignments( OrderedDict([('treated', True)])), ['sample_4', 'sample_6']) out = StringIO() schema.save(out) loaded = Schema.load(out.getvalue()) out2 = StringIO() loaded.save(out2) self.maxDiff = None self.assertEquals(out.getvalue(), out2.getvalue())
def test_model_dummy_vars_1(self): sample_nums = range(1, 13) colnames = ["gene_id"] + ["sample" + str(x) for x in sample_nums] roles = ['feature_id'] for i in range(len(sample_nums)): roles.append('sample') schema = Schema(column_names=colnames, column_roles=roles) schema.add_factor('age', [2, 20, 55]) schema.add_factor('sex', ['male', 'female']) schema.add_factor('treated', [False, True]) counter = 0 self.assertEquals(schema.sample_num("sample1"), 0) self.assertEquals(schema.sample_num("sample7"), 6) for sex in ['male', 'female']: for age in [2, 20, 55]: for treated in [True, False]: counter += 1 name = "sample" + str(counter) schema.set_factor(name, 'sex', sex) schema.set_factor(name, 'age', age) schema.set_factor(name, 'treated', treated) dummies = dummy_vars(schema, ['age', 'treated'], level=2) expected = DummyVarTable(({}, { 'age': 20 }, { 'age': 55 }, { 'treated': True }, { 'age': 20, 'treated': True }, { 'age': 55, 'treated': True }), [ DummyVarAssignment(factor_values=(2, False), bits=(True, False, False, False, False, False), indexes=['sample2', 'sample8']), DummyVarAssignment(factor_values=(2, True), bits=(True, False, False, True, False, False), indexes=['sample1', 'sample7']), DummyVarAssignment(factor_values=(20, False), bits=(True, True, False, False, False, False), indexes=['sample4', 'sample10']), DummyVarAssignment(factor_values=(20, True), bits=(True, True, False, True, True, False), indexes=['sample3', 'sample9']), DummyVarAssignment(factor_values=(55, False), bits=(True, False, True, False, False, False), indexes=['sample6', 'sample12']), DummyVarAssignment(factor_values=(55, True), bits=(True, False, True, True, False, True), indexes=['sample5', 'sample11']) ]) self.assertEquals(dummies, expected)
def test_schemas(self): rawfile = self.mdb.add_input_file(name="test.txt", description="Some comments", stream=StringIO("a\nb\nc\n")) schema_a = Schema() schema_a.add_factor("treated", [False, True]) schema_a.set_columns(["id", "a", "b"], ["feature_id", "sample", "sample"]) schema_a.set_factor("a", "treated", False) schema_a.set_factor("b", "treated", True) schema_b = Schema() schema_b.add_factor("age", ["young", "old"]) schema_b.set_columns(["key", "foo", "bar"], ["feature_id", "sample", "sample"]) schema_b.set_factor("foo", "age", "young") schema_b.set_factor("bar", "age", "old") a = self.mdb.add_schema("First one", "The first one", schema_a, rawfile) b = self.mdb.add_schema("Second", "Other", schema_b, rawfile) self.assertEquals(a.name, "First one") self.assertEquals(a.description, "The first one") schemas = self.mdb.all_schemas() self.assertEquals(len(schemas), 2) self.assertEquals(a.based_on_input_file_id, rawfile.obj_id) colnames = set() for s in schemas: schema = s.load() colnames.update(schema.column_names) self.assertEquals(colnames, set(["id", "a", "b", "key", "foo", "bar"])) schema_ids = self.mdb.schemas_based_on_input_file(a.based_on_input_file_id) self.assertTrue(a.obj_id in schema_ids) self.assertTrue(b.obj_id in schema_ids)
class SettingValidationTest(unittest.TestCase): def setUp(self): self.setup_paired_schema() self.setup_three_cond_schema() def setup_paired_schema(self): persons = 'abc' treateds = 'yn' self.paired_schema = Schema(['id', 'ay', 'an', 'by', 'bn', 'cy', 'cn'], [ 'feature_id', 'sample', 'sample', 'sample', 'sample', 'sample', 'sample' ]) self.paired_schema.add_factor('person', list(persons)) self.paired_schema.add_factor('treated', list(treateds)) for p in persons: for t in treateds: col = p + t self.paired_schema.set_factor(col, 'person', p) self.paired_schema.set_factor(col, 'treated', t) def setup_three_cond_schema(self): genders = 'mf' dosages = 'lmh' repnums = map(str, range(4)) prod = list(product(genders, dosages, repnums)) col_names = ['id'] + ["".join(x) for x in prod] col_roles = ['feature_id'] + list(repeat('sample', len(prod))) self.three_cond_schema = Schema(col_names, col_roles) self.three_cond_schema.add_factor('gender', list(genders)) self.three_cond_schema.add_factor('dosage', list(dosages)) for (g, d, r) in prod: col = g + d + r self.three_cond_schema.set_factor(col, 'gender', g) self.three_cond_schema.set_factor(col, 'dosage', d) def test_ftest_layouts(self): # Ftest can't be used when we have groups with only 1 replicate with self.assertRaises(UnsupportedLayoutException): Job(schema=self.paired_schema, settings=Settings(stat='f', block_variables=['person'], condition_variables=['treated'])) # But it can be used if we take away blocking Job(schema=self.paired_schema, settings=Settings(stat='f', condition_variables=['treated'])) # We can use F with three conditions, without blocking... Job(schema=self.three_cond_schema, settings=Settings(stat='f', condition_variables=['dosage'])) # ... and with blocking Job(schema=self.three_cond_schema, settings=Settings(stat='f', block_variables=['gender'], condition_variables=['dosage'])) def test_one_sample_ttest_layouts(self): # We can use one-sample t with a paired layout, where we have # 1 condition factor with 2 values, 1 blocking factors with n # values, and exactly 1 replicate for each combination of # condition and block. Job(schema=self.paired_schema, settings=Settings(stat='t', equalize_means=False, block_variables=['person'], condition_variables=['treated'])) # If we take away the blocking factor it becomes invalid with self.assertRaisesRegexp(UnsupportedLayoutException, '.*pair.*'): Job(schema=self.paired_schema, settings=Settings(equalize_means=False, stat='t', condition_variables=['treated'])) with self.assertRaisesRegexp(InvalidSettingsException, '.*equalize means.*'): Job(schema=self.paired_schema, settings=Settings(stat='t', equalize_means=True, block_variables=['person'], condition_variables=['treated'])) def test_means_ratio_layouts(self): # We can use means ratio as long as we have only two conditions Job(schema=self.paired_schema, settings=Settings(equalize_means=False, stat='means_ratio', condition_variables=['treated'])) Job(schema=self.paired_schema, settings=Settings(equalize_means=False, stat='means_ratio', block_variables=['person'], condition_variables=['treated'])) # We can't use means ratio if there are three conditions with self.assertRaises(UnsupportedLayoutException): Job(schema=self.three_cond_schema, settings=Settings(equalize_means=False, stat='means_ratio', condition_variables=['dosage'])) with self.assertRaises(UnsupportedLayoutException): Job(schema=self.three_cond_schema, settings=Settings(equalize_means=False, stat='means_ratio', block_variables=['gender'], condition_variables=['dosage'])) with self.assertRaises(InvalidSettingsException): Job(schema=self.paired_schema, settings=Settings(equalize_means=True, stat='means_ratio', block_variables=['person'], condition_variables=['treated'])) def test_unknown_statistic(self): with self.assertRaises(UnknownStatisticException): Job(schema=self.paired_schema, settings=Settings(stat='BadStat', condition_variables=['treated']))
def test_schemas(self): rawfile = self.mdb.add_input_file(name="test.txt", description="Some comments", stream=StringIO("a\nb\nc\n")) schema_a = Schema() schema_a.add_factor('treated', [False, True]) schema_a.set_columns(['id', 'a', 'b'], ['feature_id', 'sample', 'sample']) schema_a.set_factor('a', 'treated', False) schema_a.set_factor('b', 'treated', True) schema_b = Schema() schema_b.add_factor('age', ['young', 'old']) schema_b.set_columns(['key', 'foo', 'bar'], ['feature_id', 'sample', 'sample']) schema_b.set_factor('foo', 'age', 'young') schema_b.set_factor('bar', 'age', 'old') a = self.mdb.add_schema("First one", "The first one", schema_a, rawfile) b = self.mdb.add_schema("Second", "Other", schema_b, rawfile) self.assertEquals(a.name, "First one") self.assertEquals(a.description, "The first one") schemas = self.mdb.all_schemas() self.assertEquals(len(schemas), 2) self.assertEquals(a.based_on_input_file_id, rawfile.obj_id) colnames = set() for s in schemas: schema = s.load() colnames.update(schema.column_names) self.assertEquals(colnames, set(['id', 'a', 'b', 'key', 'foo', 'bar'])) schema_ids = self.mdb.schemas_based_on_input_file( a.based_on_input_file_id) self.assertTrue(a.obj_id in schema_ids) self.assertTrue(b.obj_id in schema_ids)