class VolcanoResult(mdb.EmbeddedDocument): # pylint: disable=too-few-public-methods """Volcano document type.""" # Categories dict is of the form: {<category_name>: [<category_value>, ...]} categories = mdb.MapField(field=StringList, required=True) # Tools dict is of the form: {<tool_name>: <ToolDocument>} tools = mdb.MapField(field=EmbeddedDoc(ToolDocument), required=True)
class ReadStatsSample(mdb.EmbeddedDocument): # pylint: disable=too-few-public-methods """A set of consistent fields for read stats.""" num_reads = mdb.IntField() gc_content = mdb.FloatField() codons = mdb.MapField(field=mdb.IntField(), required=True) tetramers = mdb.MapField(field=mdb.IntField(), required=True)
class ToolDocument(mdb.EmbeddedDocument): # pylint: disable=too-few-public-methods """Organize all 'plots' from a particular tool.""" tool_categories = mdb.MapField( field=mdb.MapField(field=EmbeddedDoc(ToolCategoryDocument)), required=True )
class SampleSimilarityResult(mdb.EmbeddedDocument): # pylint: disable=too-few-public-methods """Sample Similarity document type.""" # Categories dict is of the form: {<category_name>: [<category_value>, ...]} categories = mdb.MapField(field=StringList, required=True) # Tools dict is of the form: {<tool_name>: <ToolDocument>} tools = mdb.MapField(field=EmbeddedDoc(ToolDocument), required=True) data_records = mdb.ListField(mdb.DictField(), required=True) def clean(self): """Ensure that `data_records` contain valid records.""" category_names = self.categories.keys() tool_names = self.tools.keys() for record in self.data_records: for category_name in category_names: if category_name not in record: msg = 'Record must have all categories.' raise ValidationError(msg) for tool_name in tool_names: xname = '{}_x'.format(tool_name) yname = '{}_y'.format(tool_name) if (xname not in record) or (yname not in record): msg = 'Record must x and y for all tools.' raise ValidationError(msg)
class HMPResult(mdb.EmbeddedDocument): # pylint: disable=too-few-public-methods """HMP document type.""" categories = mdb.MapField(field=StringList, required=True) sites = mdb.ListField(mdb.StringField(), required=True) data = mdb.MapField(field=EmDocList(HMPDatum), required=True) def clean(self): """Ensure integrity of result content.""" for category, values in self.categories.items(): if category not in self.data: msg = f'Category \'{category}\' is not present in \'data\'!' raise ValidationError(msg) values_present = [datum.name for datum in self.data[category]] for value in values: if value not in values_present: msg = f'Value \'{value}\' is not present in \'data\'!' raise ValidationError(msg) for category_name, category_data in self.data.items(): if len(category_data) != len(self.categories[category_name]): msg = (f'Category data for {category_name} does not match size of ' f'category values ({len(self.categories[category_name])})!') raise ValidationError(msg) for datum in category_data: if len(datum.data) != len(self.sites): msg = (f'Datum <{datum.name}> of size {len(datum.data)} ' f'does not match size of sites ({len(self.sites)})!') raise ValidationError(msg)
class AlphaDiversityResult(mdb.EmbeddedDocument): """Embedded results for alpha diversity.""" # Categories dict has form: {<category_name>: [<category_value>, ...]} categories = mdb.MapField(field=mdb.ListField(mdb.StringField()), required=True) tool_names = mdb.ListField(mdb.StringField()) by_tool = mdb.MapField(field=EmDoc(AlphaDiversityTool), required=True)
class AGSResult(mdb.EmbeddedDocument): """AGS document type.""" # Categories dict has form: {<category_name>: [<category_value>, ...]} categories = mdb.MapField(field=StringList, required=True) # Distribution dict has form: {<category_name>: {<category_value>: <dist>}} distributions = mdb.MapField( field=mdb.MapField(field=EmbeddedDoc(DistributionResult)), required=True)
class ReadStatsToolResult(ToolResult): # pylint: disable=too-few-public-methods """A set of consistent fields for read stats.""" num_reads = mongoDB.IntField() gc_content = mongoDB.FloatField() codons = mongoDB.MapField(field=mongoDB.IntField(), required=True) tetramers = mongoDB.MapField(field=mongoDB.IntField(), required=True) @staticmethod def stat_fields(): """Return a list of the stats collected.""" return ['num_reads', 'gc_content', 'codons', 'tetramers']
class TaxaTreeResult(mdb.EmbeddedDocument): # pylint: disable=too-few-public-methods """Read stats embedded result.""" metaphlan2 = mdb.MapField(field=mdb.DynamicField(), required=True) kraken = mdb.MapField(field=mdb.DynamicField(), required=True) krakenhll = mdb.MapField(field=mdb.DynamicField(), required=True) def clean(self): """Check that model is correct.""" validate_json_tree(self.metaphlan2) validate_json_tree(self.kraken) validate_json_tree(self.krakenhll)
class AlphaDiversityDatum(mdb.EmbeddedDocument): """AlphaDiv datum type.""" metrics = mdb.ListField(mdb.StringField()) category_value = mdb.StringField(required=True) # metric -> distribution by_metric = mdb.MapField(field=mdb.ListField(mdb.FloatField()))
class AncestryToolResult(ToolResult): # pylint: disable=too-few-public-methods """Ancestry result type.""" # Dict of form: {<location_id: string>: <percentage: float>} populations = mongoDB.MapField(field=mongoDB.FloatField(), required=True) def clean(self): """Check that all keys are known, all values are [0, 1].""" for loc, val in self.populations.items(): if loc not in KNOWN_LOCATIONS: raise ValidationError('No known location: {}'.format(loc)) if (val > 1) or (val < 0): raise ValidationError('Value in bad range.')
class ReadsClassifiedResult(mdb.EmbeddedDocument): # pylint: disable=too-few-public-methods """Read stats embedded result.""" samples = mdb.MapField( field=mdb.EmbeddedDocumentField(SingleReadsClassifiedResult), required=True)
class MacrobeToolResult(ToolResult): # pylint: disable=too-few-public-methods """Macrobial result type.""" macrobe_row_field = mongoDB.EmbeddedDocumentField(MacrobialRow) macrobes = mongoDB.MapField(field=macrobe_row_field, required=True)
class FunctionalGenesSampleDocument(mdb.EmbeddedDocument): # pylint: disable=too-few-public-methods """Row in Functional Genes table document type.""" rpkm = mdb.MapField(mdb.FloatField(), required=True) rpkmg = mdb.MapField(mdb.FloatField(), required=True)
class Humann2Result(ToolResult): # pylint: disable=too-few-public-methods """HUMANn2 result type.""" pathways = mongoDB.MapField(field=EmbeddedDoc(Humann2PathwaysRow), required=True)
class KrakenResult(ToolResult): # pylint: disable=too-few-public-methods """Kraken tool's result type.""" # Taxa is of the form: {<taxon_name>: <abundance_value>} taxa = mongoDB.MapField(mongoDB.IntField(), required=True)
class VFDBSampleDocument(mdb.EmbeddedDocument): # pylint: disable=too-few-public-methods """Tool document type.""" rpkm = mdb.MapField(mdb.FloatField(), required=True) rpkmg = mdb.MapField(mdb.FloatField(), required=True)
class MacrobeResult(mdb.EmbeddedDocument): # pylint: disable=too-few-public-methods """Set of macrobe results.""" samples = mdb.MapField(mdb.MapField(mdb.FloatField()), required=True)
class MethylResult(mdb.EmbeddedDocument): # pylint: disable=too-few-public-methods """Methyls document type.""" samples = mdb.MapField(field=EmbeddedDoc(MethylSampleDocument), required=True)
class KrakenHLLResult(ToolResult): """Kraken tool's result type.""" # Taxa is of the form: {<taxon_name>: <abundance_value>} taxa = mongoDB.MapField(mongoDB.IntField(), required=True)
class Humann2NormalizeToolResult(ToolResult): # pylint: disable=too-few-public-methods """Humann2 Normalize result type.""" hum_row_field = mongoDB.EmbeddedDocumentField(Humann2NormalizeRow) genes = mongoDB.MapField(field=hum_row_field, required=True)
class AncestryResult(mdb.EmbeddedDocument): """Set of Ancestry results.""" # Dict of form: {<sample_id>: <PopulationEntry>} samples = mdb.MapField(field=EmDoc(PopulationEntry), required=True)
class PathwayResult(mdb.EmbeddedDocument): # pylint: disable=too-few-public-methods """Set of pathway results.""" samples = mdb.MapField(field=EmbeddedDoc(PathwaySampleDocument), required=True)
class ShortbredResult(ToolResult): # pylint: disable=too-few-public-methods """Shortbred tool's result type.""" # Abundances is of the form: {<amr_gene>: <abundance_value>} abundances = mongoDB.MapField(mongoDB.FloatField(), required=True)
class FunctionalGenesResult(mdb.EmbeddedDocument): # pylint: disable=too-few-public-methods """Fucntioanl Genes document type.""" sample_doc_field = mdb.EmbeddedDocumentField(FunctionalGenesSampleDocument) samples = mdb.MapField(field=sample_doc_field, required=True)
class VFDBResult(mdb.EmbeddedDocument): # pylint: disable=too-few-public-methods """Sample Similarity document type.""" sample_doc_field = mdb.EmbeddedDocumentField(VFDBSampleDocument) samples = mdb.MapField(field=sample_doc_field, required=True)
class CARDAMRToolResult(ToolResult): # pylint: disable=too-few-public-methods """CARD AMR Alignment result type.""" amr_row_field = mongoDB.EmbeddedDocumentField(AMRRow) genes = mongoDB.MapField(field=amr_row_field, required=True)
class PopulationEntry(mdb.EmbeddedDocument): """Ancestry population entry.""" # Dict of form: {<location_id: string>: <percentage: float>} populations = mdb.MapField(field=mdb.FloatField(), required=True)
class PathwaySampleDocument(mdb.EmbeddedDocument): # pylint: disable=too-few-public-methods """Pathway for a single sample.""" pathway_abundances = mdb.MapField(mdb.FloatField(), required=True) pathway_coverages = mdb.MapField(mdb.FloatField(), required=True)
class VFDBToolResult(ToolResult): # pylint: disable=too-few-public-methods """Virulence Factor result type.""" vfdb_row_field = mongoDB.EmbeddedDocumentField(VFDBRow) genes = mongoDB.MapField(field=vfdb_row_field, required=True)