def main(): """ The program - The work horse :return: success """ path = os.path.abspath("data/genomes/test/") output_directory = os.path.abspath("data/logger") print(path) # setup the application logging LOG = logging_functions.create_logger() fh = logging.FileHandler( os.path.join(output_directory, 'pans_labyrinth.log'), 'w', 'utf-8') LOG.addHandler(fh) #options = commandline.arg_parser(client) #dgraph.execute_args(client, options) #LOG.debug(options) LOG.info("Starting pans_labyrinth") stub = dgraph.create_client_stub() client = dgraph.create_client(stub) dgraph.drop_all(client) dgraph.add_schema(client) LOG.info("Starting to create graph") for filepath in files.walkdir(path): with open(filepath, 'rb') as file: dgraph.create_graph(client, file, filepath) stub.close() LOG.info("ALL DONE")
def test_non_fasta(): """ Function to test that the program only accepts .fasta files. Could create issues if the program was reading non-fasta files and inserting the values into the graph. """ path = os.path.abspath("pans_labyrinth/tests/data/bad_fasta") with pytest.raises(SystemExit) as se: stub = dgraph.create_client_stub() client = dgraph.create_client(stub) dgraph.drop_all(client) dgraph.add_schema(client) for filepath in files.walkdir(path): with open(filepath, 'rb') as file: dgraph.create_graph(client, file, filepath) assert se.type == SystemExit
def test_no_file(): """ Function to ensure that if no file is present in the given directory then The program fails. """ path = os.path.abspath("pans_labyrinth/tests/data/empty_test") with pytest.raises(SystemExit) as se: stub = dgraph.create_client_stub() client = dgraph.create_client(stub) dgraph.drop_all(client) dgraph.add_schema(client) dgraph.add_genome_to_schema(client, "test_genome") for filepath in files.walkdir(path): with open(filepath, 'rb') as file: dgraph.create_graph(client, file, filepath) assert se.type == SystemExit
def test_hash_verification(): path = os.path.abspath("pans_labyrinth/tests/data/contig_test") stub = dgraph.create_client_stub() client = dgraph.create_client(stub) dgraph.drop_all(client) dgraph.add_schema(client) for filepath in files.walkdir(path): with open(filepath, 'rb') as file: x = 0 filename = file.name genome = "genome_" + commandline.compute_hash(filepath) dgraph.add_genome_to_schema(client, genome) all_kmers = dgraph.get_kmers_files(filename, 11) dgraph.add_kmers_dgraph(client, all_kmers, genome) assert genome == "genome_" + commandline.compute_hash(filepath)
def test_verify_contig(): """ A test to verify that the kmers returned from the graph are the same as the kmers in the original contig """ #Create graph and fill it with kmers from a fasta file path = os.path.abspath("pans_labyrinth/tests/data/contig_test") stub = dgraph.create_client_stub() client = dgraph.create_client(stub) dgraph.drop_all(client) dgraph.add_schema(client) for filepath in files.walkdir(path): with open(filepath, 'rb') as file: x = 0 filename = file.name genome = "genome_" + commandline.compute_hash(filepath) dgraph.add_genome_to_schema(client, genome) all_kmers = dgraph.get_kmers_files(filename, 11) dgraph.add_kmers_dgraph(client, all_kmers, genome) #Query graph and put all kmers into a list sg1 = dgraph.path_query(client, genome) kmer_list = [] for i, x in enumerate(sg1["path"]): kmer = sg1["path"][i]["kmer"] kmer_list.append(kmer) print(kmer_list) #Gets the whole first kmer in the list and then the last character of the rest of the kmers in the list #Creates a string representing the contig first, *rest = kmer_list ends = [kmer[-1] for kmer in rest] contig = ''.join([first] + ends) #Gets the actual contig from the fasta file and turns it into string with open(path + "/test.fasta", "r") as f: for record in SeqIO.parse(f, "fasta"): sequence = record.seq sequence_string = str(sequence) assert contig == sequence_string[:-1]