Esempio n. 1
0
# Now modified to enable the use of different sp. code dictionaries and group lists in analysis, determined by sys.argv[1] and sys.argv[2]..
import group
import sys

# sys.argv[1] must equal groups, alt_groups or alt_groups_18.
# sys.argv[2] must equal codes, alt_codes, or alt_codes_18.

# function_mappings and select_function() function are defined to map the sys.argvs from strings to actual functions.
# Comma-separated returns allow multiple returns from the same function (only just realised this).
function_mappings = {
    'groups': group.groups(),
    'alt_groups': group.alt_groups(),
    'alt_groups_18': group.alt_groups_18(),
    'codes': group.codes(),
    'alt_codes': group.alt_codes(),
    'alt_codes_18': group.alt_codes_18()
}


def select_function():
    while True:
        try:
            return function_mappings[sys.argv[1]], function_mappings[
                sys.argv[2]]
        except KeyError:
            print('Invalid function, try again.')


# select_function is called with the relevant group list and codes - these should correspond else the program may fail.
group_list, sp_codes = select_function()
query = []
Esempio n. 2
0
import group
import glob
import re

codes = group.alt_codes_18()
sp_list = []

for sp in codes:
    sp_list.append(sp)

to_parse = glob.glob("*.fal")

for sp in sp_list:
    filename = "%s_total.txt" % sp
    output = open(filename, "w")
    k = 0
    for file in to_parse:
        species_present = []
        with open(file) as f:
            for line in f:
                if line.startswith(">"):
                    fields = re.split("_", line)
                    species = fields[0][1:]
                    if sp == species:
                        if species not in species_present:
                            species_present.append(species)
                            outputWrite = output.write(f"{file}\n")
                            k += 1
    outputWrite = output.write(f"{sp} total OGs: {k}")
    output.close()
# This script will print out the groups present in certain OGs.
# Is only a temporary script - just needed to see the constituent taxa of 'Other' in certain OGs.
# The OGs in question are from 4-way ventral groove-bearing eukaryote sets.
import group
import re

vg = "setquery_outputs/ventral_groove/ventral_groove_minus_one.txt"
og_list = []
og_list = group.count_ogs(vg)

# Append str to og_list using list comprehension.
# Note: a for loop og + .fal will not keep the changes.
og_list = [og + ".fal" for og in og_list]
to_parse = og_list

code_map = group.alt_codes_18()

# Lifted from find_group.
for falfile in to_parse:
    groups_present = []
    with open(falfile) as f:
        for line in f:
            if line.startswith('>'):
                fields = re.split('_', line)  # Separates sp. code
                species_code = fields[0][1:]  # Removes '>'
                for i in code_map:  # Linking sp. code to group
                    gr = code_map[species_code]
                    if gr not in groups_present:  # Adding to group array if new group
                        groups_present.append(gr)
    print(falfile, groups_present)