def load_contigs(cls, handle, contigswrapper): """ Attempts to load contigs to Blazegraph. Args: contigswrapper: a ContigsWrapper instance for storing contig metadata """ sequencedata = SeqIO.parse(handle, 'fasta') contigs = [] # list of 2-tuples (accession name, seq) for record in sequencedata: accession_name = record.name.split("|")[3].split(".")[0] if "complete" in record.description.lower(): accession_name = accession_name + "_closed" if check_named_individual(accession_name): print "%s already in Blazegraph." % accession_name raise TypeError else: contigs.append((accession_name, str(record.seq))) if "plasmid" in record.description.lower(): contigswrapper.dict["is_from"] = 'PLASMID' contigswrapper.add_contigs(contigs)
def load_sequence(self, seqdata): """Checks to see if the sequence is already uploaded onto Blazegraph, and if not, try loading the sequence. Logs all TypeErrors (assumption: all other errors are mistakes in the process and not something that requires manual curation). Args: seqdata: a SequenceMetadata instance storing sequence-related data that would otherwise be a data clump """ print seqdata.name if check_named_individual(seqdata.name): print "%s already in Blazegraph." % seqdata.name raise TypeError else: self.get_seqdata(seqdata)
def create_gene(cls, metadata): """ Creates a Gene object to export the data out in the turtle format with the appropriate RDF tags and uploads it into Blazegraph. Args: metadata(GeneMetadata): An instance that contains metadata pertaining to an individual gene """ graph = Graph() name = metadata.name if check_named_individual(name): print "%s already in Blazegraph." % name else: kwargs = metadata.build_kwargs() Gene(graph, **kwargs).rdf() BlazegraphUploader().upload_data(generate_output(graph))