def do_LIST(self, params): if params: self.writeline('501 command syntax error') return self.writeline('215 list of newsgroups follows') for g in group.groups(): (lowest, highest, count) = g.article_range() self.writeline('%s %s %s n' % (g.name, highest, lowest)) self.writeline('.')
import spgr # *** Calculates the proportion of shared genome for each eukaryote group. *** import group import parseTotal groups = group.groups() output = open("gr_prop_freq_output.txt", "w") totaldic = parseTotal.parse_total() for g in groups: proplist = [] filename = "gr_" + g + ".txt" res = spgr.gr_count(filename) # gr_count is carried out on the relevant output file. total = totaldic[g] # Total genome size is accessed for each group for name in res: og = res[name] # Number of OGs shared with group g is compared for every group. prop = round(int(og) / int(total) * 100, 2) # Proportion is calculated. proplist.append(prop) propdic = dict(zip(groups, proplist)) # Proportion dictionary is created. print(propdic) outputWrite = output.write(f"{g}\t{str(propdic)}\n") output.close()
# *** setquery.py can be used to search presence of OGs for 3+ groups. This can be used to link OGs to particular character traits. *** # Now modified to enable the use of different sp. code dictionaries and group lists in analysis, determined by sys.argv[1] and sys.argv[2].. import group import sys # sys.argv[1] must equal groups, alt_groups or alt_groups_18. # sys.argv[2] must equal codes, alt_codes, or alt_codes_18. # function_mappings and select_function() function are defined to map the sys.argvs from strings to actual functions. # Comma-separated returns allow multiple returns from the same function (only just realised this). function_mappings = { 'groups': group.groups(), 'alt_groups': group.alt_groups(), 'alt_groups_18': group.alt_groups_18(), 'codes': group.codes(), 'alt_codes': group.alt_codes(), 'alt_codes_18': group.alt_codes_18() } def select_function(): while True: try: return function_mappings[sys.argv[1]], function_mappings[ sys.argv[2]] except KeyError: print('Invalid function, try again.') # select_function is called with the relevant group list and codes - these should correspond else the program may fail. group_list, sp_codes = select_function()
# table4.py is modified from the earlier table files. # Its purpose is to order and format the pairwise OG data so it enters R in the correct manner. # This is the alternative to ordering and formatting in R, which I cannot do at this point. # This script will serve my needs until I am more competent in R. # minusown list is ordered differently due to differences in total OGs when own groups are excluded. import re import group import glob group_names = group.groups() group_names.sort() data = [] output = open("ordered_vector_data_minusown.txt", "w") for name in group_names: to_parse = glob.glob("*.txt") for file in to_parse: filename = file.split("_") if name in filename: with open(file) as f: for line in f: og = re.search(r"\w*:\s(\w*)", line) if og: data.append(og.group(1)) # This list can be modified as needed. # At this time I have arranged the groups in order of total OGs shared. # This will result in a more sensible-looking stacked barplot. correct_order = [ "SAR", "Other", "Archaeplastida", "Haptista", "Obazoa", "Discoba", "Ancyromonadida", "Cryptista", "Amoebozoa", "Metamonads", "Malawimonadidae"
# total.py is the new, integrated file that processes total OG data (formerly carried out by parseOG.py and groupdata.py). # The related functions, parse_total() (from parseTotal.py) and parse_OG (from parseOG.py) are now incorporated into the group module. # split_other_group() has also been added to group.py to include relevant subgroups from 'Other' in a new list. import glob import group import re # To be executed in total_genome dir. # 18 groups list is currently being used. to_parse = glob.glob("*_allOGs.txt") output = open("/mnt/c/Users/scamb/Documents/uob_msc/Genome_Data/OG_arb-fal/new_outputs/summary_data/group_total.txt", "w") original_groups = group.groups() correct_order_groups_15 = ["SAR", "Haptista", "Archaeplastida", "Obazoa", "Telonemids", "Discoba", "Ancyromonadida", "Cryptista", "Atwista", "Amoebozoa", "Collodictyonids", "Metamonads", "Apusomonada", "Hemimastigophora", "Malawimonadidae"] correct_order_groups_18 = ["Alveolata", "Stramenopiles", "Archaeplastida", "Obazoa", "Telonemids", "Discoba", "Centrohelids", "Ancyromonadida", "Cryptista", "Rhizaria", "Haptyophyta", "Atwista", "Amoebozoa", "Collodictyonids", "Metamonads", "Apusomonada", "Hemimastigophora", "Malawimonadidae"] for file in to_parse: name = re.split("_", file) og = group.parse_OG(file) outputWrite = output.write(f"{name[0]}\t{og}\n") output.close() # Writes out totals and own OGs to a file as a single vector. # The data can then be processed in R. own_OGs_to_parse = glob.glob("/mnt/c/Users/scamb/Documents/uob_msc/Genome_data/OG_arb-fal/new_outputs/*.txt") totals_own_output = open("/mnt/c/Users/scamb/Documents/uob_msc/Genome_data/OG_arb-fal/new_outputs/summary_data/vector_totals_own_18.txt", "w") # correct_order_groups must be at top of loop to ensure correct order is maintained. # list of choice can be configured here. for eugroup in correct_order_groups_18:
for art in g.article_numbers(): stat = os.stat(g.article_file(art)) if now - stat.st_mtime > lifetime: to_remove.add(art) if to_remove: logger.info("Expiring in " + g.name) index = g.load_eval("index", {}) # XXX might need to generate index if it didn't exist g.saferemove("index") for (id, art) in index.items(): if art in to_remove: logger.info("Expiring article %s@%s (%s)" % (id, g.name, art)) del index[id] # XXX need to catch exceptions so we always save next art number g.save("index", repr(index)) for art in to_remove: g.delete_article(art) finally: g.lockfile.unlock() for g in group.groups(): expire(g)