Example #1
0
def build_filesystem():
    # DIRECTORY STRUCTURE
    global SCRATCH, BASEDIR, REFERENCES, DATA, TRIM_DIR, ASSEMBLY_DIR, ANNOTATION_DIR, MAP_DIR
    SCRATCH = Dir.make('/scratch/' + os.environ['USER'])
    BASEDIR = Dir('/Strong/proj/.data/Project_NTM')
    REFERENCES = Dir(BASEDIR.join("lib", "reference_genomes"))
    DATA = BASEDIR.make_subdir("data")
    # RAW_DIR = DATA.make_subdir("00_raw")
    TRIM_DIR = DATA.make_subdir('trimmed_reads')
    ASSEMBLY_DIR = DATA.make_subdir('assemblies')
    ANNOTATION_DIR = DATA.make_subdir('annotations')
    MAP_DIR = DATA.make_subdir('mapped_reads')
Example #2
0
"""
beagles[compbio] alma: grep , E-ALMA1.MAV_vs_NJH87.cf | more

CP018363.1	83972	C	T,G	0	34	34	SNP
CP018363.1	148672	A	G,C	0	14	14	SNP
CP018363.1	1381815	G	A,C	0	8	8	SNP
CP018363.1	2334023	G	A,T	0	30	30	SNP
CP018363.1	2948951	T	C,G	0	6	6	SNP
CP018363.1	3621547	T	C,G	0	35	35	SNP
CP018363.1	4243092	A	G,C	0	33	33	SNP
CP018363.1	4562156	T	C,G	0	15	15	SNP
"""


VCF_DIR = Dir("/Strong/proj/.data/alma")
FASTA_DIR = VCF_DIR.make_subdir("fasta")
MATRIX = FASTA_DIR.join("ALMA_matrix_N.fna")
CSV_OUT = FASTA_DIR.join("ALMA_stats.csv")
CSV_MUTATIONS = FASTA_DIR.join("ALMA_mutations.csv")
POSITIONS = 5626623

files = VCF_DIR.files(endswith='.cf')

records = []
multi_alleles = {}

print(f"Building Matrix: {MATRIX} from {len(files)} files.")
for i, file in enumerate(files):
    print(f"\t{i:02d} | {file.filename}")
    isolate = file.filename.split("_vs_")[0]
    seq = ['-'] * POSITIONS