Example #1
0
 def _remove_variant_set(self, variant_set_name):
     vs = VariantSet.objects.get(name=variant_set_name,
                                 reference_set=self.reference_set)
     for call_set in VariantCallSet.objects(variant_sets=vs):
         call_set.variant_sets.remove(vs)
         call_set.save()
         # Remove calls from callsets that only have this variantset
         if len(call_set.variant_sets) < 2:
             VariantCall.objects(call_set=call_set).delete()
             call_set.delete()
     # Remove variants that are ONLY from this variant set
     Variant.objects(variant_sets=vs, variant_sets__size=2).delete()
     VariantSetMetadata.objects(variant_set=vs).delete()
     vs.delete()
 def test_add_second_vcf_variant_set(self):
     # This VCF only has one Variant which is not in the first VCF
     vcf = VCF(f="tests/vcf_tests/test2.vcf",
               reference_set_id=self.reference_set.id,
               method="CORTEX")
     vcf.add_to_database()
     assert VariantSet.objects().count() == 3
     assert VariantCallSet.objects().count() == 2
     assert VariantCall.objects().count() == 42
     assert Variant.objects().count() == 22
     assert len(Variant.objects()[0].variant_sets) == 3
     assert len(
         Variant.objects.get(
             names="UNION_BC_k31_var_147").variant_sets) == 3
 def test_add_add_variants_and_calls(self):
     vcf = VCF(f="tests/vcf_tests/test.vcf",
               reference_set_id=self.reference_set.id,
               method="CORTEX")
     vcf.add_to_database()
     assert VariantCall.objects().count() == 21
     assert Variant.objects().count() == 21
 def test_add_new_vcf_variant_set(self):
     vcf = VCF(f="tests/vcf_tests/test.vcf",
               reference_set_id=self.reference_set.id,
               method="CORTEX")
     vcf.add_to_database()
     # We create a global variant set as well as one for the individual VCF
     assert VariantSet.objects().count() == 2
     vs = VariantSet.objects()[0]
     assert len(Variant.objects()[0].variant_sets) == 2
     assert vs.name == "test.vcf"
 def test_add_second_vcf_variant_set(self):
     # This VCF only has one Variant which is not in the first VCF
     vcf = VCF(f="tests/vcf_tests/test3.vcf",
               reference_set_id=self.reference_set.id,
               method="CORTEX")
     vcf.add_to_database()
     assert VariantSet.objects().count() == 2
     assert VariantCallSet.objects().count() == 1
     assert VariantCall.objects().count() == 106
     assert Variant.objects().count() == 106
     assert Variant.snps().count() == 89
     assert Variant.indels().count() == 17
     assert Variant.insertions().count() == 8
     assert Variant.deletions().count() == 8
     assert Variant.ph_snps.count() == 1
Example #6
0
def run(parser, args):
    DB = connect('atlas-%s' % (args.db_name))
    if DB is not None:
        try:
            Variant.objects()
            logging.info(
                "Connected to atlas-%s" % (args.db_name))
        except (ServerSelectionTimeoutError, ConnectionError):
            DB = None
            logging.warning(
                "Could not connect to database. Continuing without using genetic backgrounds")
    mutations = []
    reference = os.path.basename(args.reference_filepath).split('.fa')[0]
    if args.vcf:
        run_make_probes_from_vcf_file(args)
    elif args.genbank:
        aa2dna = GeneAminoAcidChangeToDNAVariants(
            args.reference_filepath,
            args.genbank)
        if args.text_file:
            with open(args.text_file, 'r') as infile:
                reader = csv.reader(infile, delimiter="\t")
                for row in reader:
                    gene, mutation, alphabet = row
                    if alphabet == "DNA":
                        protein_coding_var = False
                    else:
                        protein_coding_var = True
                    for var_name in aa2dna.get_variant_names(
                            gene, mutation, protein_coding_var):
                        mutations.append(
                            Mutation(reference=reference,
                                     var_name=var_name,
                                     gene=aa2dna.get_gene(gene),
                                     mut=mutation))
        else:
            for variant in args.variant:
                gene, mutation = variant.split("_")
                for var_name in aa2dna.get_variant_names(gene, mutation):
                    mutations.append(
                        Mutation(reference=reference,
                                 var_name=var_name,
                                 gene=gene,
                                 mut=mutation))
    else:
        if args.text_file:
            with open(args.text_file, 'r') as infile:
                reader = csv.reader(infile, delimiter="\t")
                for row in reader:
                    gene_name, pos, ref, alt, alphabet = row
                    if gene_name == "ref":
                        mutations.append(
                            Mutation(
                                reference=reference,
                                var_name="".join([ref, pos, alt])))
                    else:
                        mutations.append(
                            Mutation(
                                reference=reference,
                                var_name=row[0]))
        else:
            mutations.extend(Mutation(reference=reference, var_name=v)
                             for v in args.variants)

    al = AlleleGenerator(
        reference_filepath=args.reference_filepath,
        kmer=args.kmer)
    for mut in mutations:
        variant_panel = make_variant_probe(
            al, mut.variant, args.kmer, DB=DB, no_backgrounds=args.no_backgrounds)
        if variant_panel is not None:
            if mut.gene:
                sys.stdout.write(
                    ">ref-%s?num_alts=%i&gene=%s&mut=%s&ref=%s\n" %
                    (mut.variant.var_name, len(
                        variant_panel.alts), mut.gene.name, mut.mut, mut.reference))
            else:
                sys.stdout.write(
                    ">ref-%s?num_alts=%i\n" %
                    (mut.variant.var_name, len(
                        variant_panel.alts)))
            sys.stdout.write("%s\n" % variant_panel.ref)
            for a in variant_panel.alts:
                sys.stdout.write(">alt-%s\n" % mut.mut)
                sys.stdout.write("%s\n" % a)
        else:
            logging.warning(
                "All variants failed for %s_%s - %s" %
                (mut.gene, mut.mut, mut.variant))