def add_gen(): text = request.form['lyrics'] file = open("data.txt", "w") file.write(text) file.close() merger.merge() return redirect(url_for('/gen'))
def adder_inter(): text = request.form['lyrics'] file = open("data.txt", "w") file.write(text) file.close() merger.merge() return "Added lyrics!"
def main(): merger.merge() pushToMongo(merger.RTL_DATA) print(merger.RTL_DATA.dtypes) print("Added to DB") merger.save()
def main(): """Main function which is called first""" if len(sys.argv) < 3: print "Usage: python indexer.py <path-to-wiki-dump> <inverted-index-out-file>" return 1 handler = WikiHandler() parser = make_parser() parser.setContentHandler(handler) dump = open(sys.argv[1], "r") parser.parse(dump) merge(handler.temp_files_length, sys.argv[2], handler.docs_length)
def main(): args = parse_args() cvj_infer = CVJ(args.inferred_json, args.image_dir) cvj_origin = CVJ(args.original_json, args.image_dir) cvj_obj = merger.merge(cvj_infer, cvj_origin) if args.refine: cvj_obj = refine( cvj_obj, get_biggest_area_bbox ) # to be replaced with something more generic when integrated with CVJSON # Creating the Unique name dir_name = os.path.dirname(args.inferred_json) date, time = str(datetime.datetime.now()).split(" ") time = time.split('.')[0].replace(':', '_') name = "refined_" + date + '_' + time + ".json" path = os.path.join(dir_name, name) # saving the merged and/or refined file cvj_obj.save_internal_json(save_name=path) if args.vis: Visualizer(cvj_obj).visualize_bboxes()
def test_two_strings_can_be_merged_white_separation(self): first_string = 'hello world. how are you?' second_string = 'hi there. i am fine' merged = merge(first_string, second_string, 40) self.assertEqual( merged, 'hello world. how are you? hi there. i am fine')
def test_works_even_if_there_are_more_lines_in_second_sentence(self): first_string = 'hello world, long time no see!\nhow are you?' second_string = 'hi there.\ni am fine.\nthanks for asking.' merged = merge(first_string, second_string, 40) self.assertEqual(merged, 'hello world, long time no see! hi there.\n' + \ 'how are you? i am fine.\n' + \ ' thanks for asking.')
def cycle_tours(xy): o1 = TwoOpt(xy) order = generate_tour(xy, random.randrange(len(xy))) o1.tour.reset(order) o1.optimize() histogram = {} o2 = TwoOpt(xy) for i in range(1, len(xy)): order = generate_tour(xy, random.randrange(len(xy))) o2.tour.reset(order) o2.optimize() length1 = o1.tour.tour_length() length2 = o2.tour.tour_length() if not merger.merge(o1, o2.tour.node_ids) and length2 < length1: o1.tour.reset(o2.tour.node_ids) length1 = length2 if length2 not in histogram: histogram[length2] = 0 histogram[length2] += 1 print(str(i) + " tour length: " + str(length2)) print("best tour length after merge: " + str(length1)) histogram_list = [] for length in histogram: histogram_list.append((length, histogram[length])) histogram_list.sort() for tup in histogram_list: print(str(tup[0]) + ": " + str(tup[1]))
def pDC3(T,append_len,threadNum,runType,deep): # recursive time deep += 1 # [2,1,4,4,1,4,4,1,3,3,1,0,0,0],in case [1,0,0] is missed for i in xrange(append_len): T.append(0) # algo<line 1> S:[([1,4,4],1),...,([1,0,0],10),([0,0,0],11),([0],13)] S = getter.getS(T,threadNum,runType) # algo<line 2> sort S by item(0), S:[([0],13),([0,0,0],11),...,([4,4,1],5)] S=sorted(S, key=itemgetter(0)) # algo<line 3> name(S) ''' P=[(name,pos)], P:[(1,13),...,(4,7),(5,1),(5,4)...,(7,2),(7,5)] max_name=7, names: 1 ,..., 4 , 5 , 5 ..., 7 , 7 ''' P,max_name = getter.getP(S) # algo<line 4> if names in P are not unique if max_name <len(P): # algo<line 5> sort P (1,4,7,...,2,5,8),P:[(5,1),(5,4),...,(6,8),(2,11)] P=sorted(P, key=lambda p: (p[1]%3,p[1]/3)) # algo<line 6> recursively compute pDC3([5,5,4,3,1,7,7,6,2]) SA12 = pDC3([pair[0] for pair in P],append_len,threadNum,runType,deep) # algo<line 7> ''' P[4]=(1,13),P[8]=(2,11),P[3]=(3,10)... SA12 :[ 4 , 8 , 3 , 2 , 1 , 0,7,6,5 ] P[SA12]:[(1,13),(2,11),(3,10),(4,7),(5,4),(5,1),...] newP :[(1,13),(2,11),(3,10),(4,7),(5,4),(6,1),...] ''' P = getter.getUniqueP(P,SA12,threadNum,runType) #P = [(j+1,P[SA12[j]][1]) for j in xrange(len(SA12))] # algo<line 8> permute P (1,2,4,5,...), P:[(6,1),(9,2),(5,4),...,(1,13)] #P=sorted(P, key=itemgetter(1)) P = radixSort.sort(P, 1) # algo<line 9,10,11> get S0,S1,S2. ''' S0:[(T[i],T[i+1],c1 ,c2,i),...] i%3==0; (c1,i+1),(c2,i+2) in P S0:[(m ,i ,6 ,9 ,0),...] i = 0; (6 ,0+1),(9 ,0+2) in P S0:[(s ,i ,5 ,8 ,3),...] i = 3; (5 ,3+1),(8 ,3+2) in P S1:[(c0 ,T[i] , c1,i),...] i%3==1; (c0,i ),(c1,i+1) in P S1:[(6 ,i , 9 ,1),...] i = 1; (6 ,1 ),(9 ,1+1) in P S2:[(c0 ,T[i] ,T[i+2],c2,i),...] i%3==2; (c0,i ),(c2,i+2) in P S2:[(9 ,s ,s ,5 ,2),...] i = 2; (9 ,2 ),(5 ,2+2) in P ''' S0,S1,S2 = getter.getS012(T,P,threadNum,runType) # algo<line 12> merge S0, S1, S2 by comparison function ''' s12 in (S1 or S2), s0 in S0, s1 in S1, s2 in S2 s12 <= s12' : if c0 <= c0' (6,i, 9,1) < (9,s,s,5,2) : 6 < 9 s0 <= s0' : if (T[i],c1) <=(T[i'],c1') (m,i,6,9,0) < (s,i,5,8,3) : (m ,6 ) < (s ,5 ) s0 <= s1' : if (T[i],c1) <=(T[i'],c1') (m,i,6,9,0) > (6,i, 9,1) : (m ,6) > (i ,9 ) s0 <= s2' : if (T[i],T[i+1],c2)<=(T[i'],T[i'+1],c2') (s,i,5,8,3) < (9,s,s,5,2) : (s ,i ,8) < (s ,s ,5 ) ''' # SA=[11, 10, 7, 4, 1, 0, 9, 8, 6, 3, 5, 2] SA=merger.merge(S0,S1,S2) # pop appendix [11,10,7,4,...] => [10,7,4,...] while(len(SA)>(len(T)-append_len)):SA.pop(0) return SA
def write(self): merged_data = merge(self.rooms, self.students) export_data = self.exporter().export(merged_data) folder_path = getcwd() + '/output_data/' file_path = folder_path + 'output' + self.exporter.extension if not exists(folder_path): mkdir(folder_path) with open(file_path, 'w') as f: f.write(export_data)
def main(product, session, server, output, tree_index, first_page): book = niter.Book(product, session, server) click.echo("Using book {}".format(book.title)) urls, names = tree.get_tree_url(book, tree_index) all_contents = downloader.download_all(urls, book) click.echo("Got {} PDFs to merge".format(len(all_contents))) merged_contents = merger.merge(all_contents, names, output, first_page) click.echo("Done!")
def test(name): path = os.path.join(TEST_PATH, name) initial = Profile('initial', path).refresh() current = Profile('current', path).refresh() server = Profile('server', path).refresh() out = Profile('out', path).refresh() errors = merger.merge(initial.data, current.data, server.data) if errors: print termcolor.colored(errors, 'yellow') compare_data_profile(initial, out)
def load_artifact(profile_names, filename, tag=None): try: with open(filename, 'r') as fp: artifact_conf = yaml.load(fp.read()) if 'profiles' in artifact_conf: profiles_conf = artifact_conf['profiles'] del artifact_conf['profiles'] for profile in profile_names: if profile in profiles_conf: artifact_conf = merge(artifact_conf, profiles_conf[profile]) return Artifact(artifact_conf).set_tag(tag) except IOError: raise PhilipException("{} WARNING: Job file %s not found" % filename)
def _merge(self): errors = merger.merge(self.initial, self.data, self.server_profile.data) for error in errors: print termcolor.colored(error, 'yellow') print "Saving: %s" % self.server_profile.name print "Saving: %s" % self.name # Finish the parse for name in self.ALL_DATA: self.server_profile.data[name].save() self.data[name].save() self.initial[name].save() self.data[name].on_parsed() self.initial[name].on_parsed()
def main(): jsonFile1=sys.argv[1] jsonFile2=sys.argv[2] if len(sys.argv)==4: outputFileName=sys.argv[3] f1=open(jsonFile1) f2=open(jsonFile2) userJson=f1.read() serverJson=f2.read() f1.close() f2.close() timeBegin=time.time() userJson=merger.merge(userJson,serverJson) timeConsumed=time.time()-timeBegin print("merge 1 json file"+sys.argv[2]+" and timeConsumed="+str(timeConsumed)) if len(sys.argv)==3: fout=open(jsonFile1,mode="w",encoding="utf8") fout.write(userJson) else: fout=open(outputFileName,mode="w",encoding="utf8") fout.write(userJson) fout.close()
def main(): jsonFile1 = sys.argv[1] jsonFile2 = sys.argv[2] if len(sys.argv) == 4: outputFileName = sys.argv[3] f1 = open(jsonFile1) f2 = open(jsonFile2) userJson = f1.read() serverJson = f2.read() f1.close() f2.close() timeBegin = time.time() userJson = merger.merge(userJson, serverJson) timeConsumed = time.time() - timeBegin print("merge 1 json file" + sys.argv[2] + " and timeConsumed=" + str(timeConsumed)) if len(sys.argv) == 3: fout = open(jsonFile1, mode="w", encoding="utf8") fout.write(userJson) else: fout = open(outputFileName, mode="w", encoding="utf8") fout.write(userJson) fout.close()
def run_new(): testdata = [ip_network(_) for _ in open('testdata.txt').readlines()] result = merge(testdata)
histogram_list.append((length, histogram[length])) histogram_list.sort() for tup in histogram_list: print(str(tup[0]) + ": " + str(tup[1])) if __name__ == "__main__": xy = reader.read_xy("input/berlin52.tsp") xy = reader.read_xy("input/xqf131.tsp") cycle_tours(xy) sys.exit() o1 = TwoOpt(xy) o1.tour.reset(generate_random_tour(xy)) o1.optimize() print("tour1 length: " + str(o1.tour.tour_length())) o2 = TwoOpt(xy) for i in range(1000): o2.tour.reset(generate_random_tour(xy)) o2.optimize() new_tour_length = o2.tour.tour_length() print("iteration " + str(i) + " best tour length: " + str(o1.tour.tour_length())) print("iteration " + str(i) + " new_tour length: " + str(new_tour_length)) if new_tour_length < o2.tour.tour_length(): o1.tour.reset(o2.tour.node_ids) continue merger.merge(o1, o2.tour.node_ids) print("final tour length: " + str(o1.tour.tour_length()))
def fill_col_labels(col_labels, filter): filters_out = [] for col in filter: filters_out.append(col_labels[col - 1]) return filters_out for idx, query in enumerate(plots_folders): # PERF # -------------------- custom_headers = gnuplot_headers.copy() merger.merge("perf", query) filters = fill_col_labels(perf_col_labels, FILTER_PERF_ALL) custom_headers.append('set title "perf_all"') custom_headers.append('set ylabel "measurement"') custom_headers.append('plot "perf_merged.dat" with errorbars') custom_headers.append('set xrange [0:100]') custom_headers.append('set yrange [0:500]') plotter.plot("perf", query, custom_headers, []) # IFPPS # -------------------- # PIDSTAT # --------------------
# -*- coding: utf-8 -*- import merger import selected_features from merger import save_to_lmdb import os merged_path = 'merged.csv' ''' if not os.path.isfile(merged_path): print("merging ...") merger.merge() print("merged") ''' print("merging ...") merger.merge() print("merged") import numpy as np import sys temp_storage = '43.39 11.03 16.21 85.62 178.8 172.6 40.39 18.69 7.767 137.4 105.2 107.9 59.82 107.9 51.32 206.8 54.73 56.91 34.09 29 46.25 49.62 47.7 96.14 83.78 178.1 178.8 87.09 82.72 177.8 178.8 136.2 135.6 18.03 37.27 39.53 28.23 35.61 37.22 3.644 67.37 21.58 63.19 82.38 82.69 178.9 82.62 180 180 86.57 180 179.7 80.72 179.9 80.44 79.99 107.7 179.8 89.15 178.1 -14.97 66.98 180 50.26 71.05 109.1 179.9 84.46 179.8 180 86.76 180 179.8 89.57 179.8 180 86.59 179.4 179.8 86.43 179.9 179.8 87.1 179.8 179.7 88.29 179.7 1791 1377 2042 1881 1604 958.5 2965 3014 2036 2325 2396 2078 3940 3616 3647 4682 3080 1474 3861 6624 2780 4152 4875 5995 3818 2253 641.1 3324 4992 5953 2714 3502 1516 8552 2113 1574 4586 15890 14370 4300 2691 1406 14470 10660 2981 11250 16000 14240 0.9126 0.4787 0.7286 0.8671 0.01697 0.8622 0.7929 -0.03522 0.3384 0.7069 0.6081 0.8366 0.7314 0.6626 0.7502 0.6727 0.6694 0.9436 0.2001 0.8859 0.04322 0.5211 0.9018 0.9656 0.9665 0.2635 0.9052 0.9012 0.7843 0.7929 0.4846 0.8235 0.7375 0.7812 0.6371 0.7238 0.4928 0.4771 0.8543 0.7993 0.8932 0.6572 0.4866 0.01178 0.1309 0.718 0.7586 0.7251'.split( ' ') normalizer = [] for value in temp_storage: normalizer.append(float(value)) temp_storage = np.array(normalizer) falls = [] with open(merged_path) as csv: content = csv.readlines()
import json import os import time import merger import codecs #this program test the performance of merge() and mergeAndLog() func path = os.getcwd() serverJson = [] for file in os.listdir(path): if (file.find("serverJson") != -1): serverJson.append( codecs.open(os.path.join(path, file), 'r', "utf8").read()) #we assume that userJson is empty at the beginning userJson = "{}" timeBegin = time.time() for sj in serverJson: userJson = merger.merge(userJson, sj) timeConsumed = time.time() - timeBegin print("merge " + str(len(serverJson)) + " json files,and timeConsumed=" + str(timeConsumed)) fout = codecs.open("userJson.json", "w", "utf8") fout.write(userJson) fout.close()
def test_two_strings_can_be_merged_white_separation(self): first_string = 'hello world. how are you?' second_string = 'hi there. i am fine' merged = merge(first_string, second_string, 40) self.assertEqual(merged, 'hello world. how are you? hi there. i am fine')
import json import os import time import merger import codecs #this program test the performance of merge() and mergeAndLog() func path=os.getcwd() serverJson=[] for file in os.listdir(path): if(file.find("serverJson")!=-1): serverJson.append(codecs.open(os.path.join(path,file),'r',"utf8").read()) #we assume that userJson is empty at the beginning userJson="{}" timeBegin=time.time() for sj in serverJson: userJson=merger.merge(userJson,sj) timeConsumed=time.time()-timeBegin print("merge "+str(len(serverJson))+" json files,and timeConsumed="+str(timeConsumed)) fout=codecs.open("userJson.json","w","utf8") fout.write(userJson) fout.close()
def test_separation_is_relative_to_the_line_ending_in_first_string(self): first_string = 'hello world, long time no see!\nhow are you?' second_string = 'hi there.\ni am fine.' merged = merge(first_string, second_string, 40) self.assertEqual(merged, 'hello world, long time no see! hi there.\n' + \ 'how are you? i am fine.')
def main(argv): calculate_atlas = False generate_atlas = False bounds_x = None bounds_z = None render_all = False stitch_tiles = False config = {} config['y_shading'] = True mode = "terrain" world = ["world"] time_of_day = "day" bedrock = False zoom = 0 try: opts, args = getopt.getopt(argv, "hx:z:am:w:c:", [ "all", "stitch", "radius=", "mode=", "world=", "light=", "bedrock", "help", "cx=", "cz=", "zoom=", "heightslice=", "layer=", "noyshading", "atlas", "atlasgen" ]) except getopt.GetoptError: print("foxelmap.py -x \"x1,x2\" -z \"z1,z2\"") sys.exit(2) for opt, arg in opts: if opt in ('-h', '--help'): print("FoxelMap Renderer") print(".\\foxelmap.py -x \"-1,1\" -z \"-1,1\"") print("") print("\t --world <path> - the path to the tiles to be rendered") print("") print("\t-a --all - renders all tiles") print("\t-x \"x1,x2\" - region x") print("\t-z \"z1,z2\" - region z") print("\t-c \"x,z\" - tile at x,z ingame coords") print("\t--cx \"x1,x2\" - tiles from x1 to x2 ingame coords") print("\t--cz \"z1,z2\" - tiles from z1 to z2 ingame coords") print("\t--radius <value> - expands the tile area selected") print("") print("\t--mode <terrain|height|land|light|biome|none>") print("\t--light <day|night|nether|end|gamma>") print("\t--bedrock - use bedrock edition biome colors") print( "\t--heightslice <slice> - thickness of layers in height mode") print( "\t--layer <layer> | choose a single voxelmap layer to render") print("\t--noyshading - disables height shading in terrain mode") print("") print("\t--zoom z") print("\t--stitch - produces a single image file with all tiles") print("") print( "\t--atlas - uses the minecraft assets folder to calculate block colors" ) print( "\t--atlasgen - generates an atlas and exports it to palettes/atlas/" ) print("\n") sys.exit() elif opt in ("-x"): render_all = False split = arg.split(",") if len(split) == 1: bounds_x = [int(arg), int(arg)] if len(split) == 2: bounds_x = [int(split[0]), int(split[1])] bounds_x.sort() elif opt in ("-z"): render_all = False split = arg.split(",") if len(split) == 1: bounds_z = [int(arg), int(arg)] if len(split) == 2: bounds_z = [int(split[0]), int(split[1])] bounds_z.sort() elif opt in ("--radius"): if bounds_x == None: bounds_x = [0, 0] if bounds_z == None: bounds_z = [0, 0] bounds_x = [bounds_x[0] - int(arg), bounds_x[1] + int(arg)] bounds_z = [bounds_z[0] - int(arg), bounds_z[1] + int(arg)] elif opt in ("--atlas"): calculate_atlas = True elif opt in ("--atlasgen"): generate_atlas = True elif opt in ("-a", "--all"): render_all = True elif opt in ("--stitch"): stitch_tiles = True elif opt in ("-m", "--mode"): if arg in ("terrain", "land", "biome", "light", "height", "none"): mode = arg elif opt in ("-w", "--world"): world = arg.split(",") elif opt in ("--light"): time_of_day = arg elif opt in ("-c"): split = arg.split(",") cx = math.floor(int(split[0]) / 256) cz = math.floor(int(split[1]) / 256) bounds_x = [cx, cx] bounds_z = [cz, cz] elif opt in ("--cx"): split = arg.split(",") cx1 = math.floor(int(split[0]) / 256) cx2 = math.floor(int(split[1]) / 256) bounds_x = [cx1, cx2] bounds_x.sort() elif opt in ("--cz"): split = arg.split(",") cz1 = math.floor(int(split[0]) / 256) cz2 = math.floor(int(split[1]) / 256) bounds_z = [cz1, cz2] bounds_z.sort() elif opt in ("--bedrock"): bedrock = True elif opt in ("--zoom"): zoom = int(arg) elif opt in ("--heightslice"): config['cut'] = int(arg) elif opt in ('--layer'): config['render_layer'] = int(arg) elif opt in ('--noyshading'): config['y_shading'] = False print(world) #if (bounds_x == None or bounds_z == None) and render_all == False: # print("ERROR: Invalid Map Bounds") # sys.exit(1) if (world == ""): print("ERROR: No World Provided") sys.exit(1) print(bounds_x, bounds_z) atlas = None light_atlas = None biome_atlas = None if generate_atlas: atlas_gen.calculate_atlas() if mode == "terrain": if calculate_atlas: atlas = atlas_gen.get_atlas(bedrock) else: atlas = atlas_gen.load_atlas(bedrock) if mode in ("terrain", "light"): light_atlas = atlas_gen.get_light_atlas(time_of_day) if mode in ("terrain", "biome"): biome_atlas = atlas_gen.get_biome_atlas() print("bounds is", bounds_x, bounds_z) for w in world: print("printing tiles for {}".format(w)) if len(world) == 1: out = "" else: out = "{}/".format(w) try: #print(out) os.makedirs("out/" + out + "/z0/") except: pass if bounds_x != None and bounds_z != None: for x in range(bounds_x[0], bounds_x[1] + 1): for y in range(bounds_z[0], bounds_z[1] + 1): if mode != "none": numpy_render.make_tile(w, atlas, light_atlas, biome_atlas, "{},{}.zip".format(x, y), mode, out, config) if render_all: if mode != "none": for voxelfile in os.listdir(w): if voxelfile.endswith('.zip'): numpy_render.make_tile(w, atlas, light_atlas, biome_atlas, voxelfile, mode, out, config) #numpy_render.make_all_tiles(w,atlas,light_atlas,biome_atlas,mode,out) if len(world) > 1: # do merge code here merger.merge(world) if zoom > 0: stitch.zoom_stitch(zoom, render_all, bounds_x, bounds_z) pass if stitch_tiles: print("stitching") stitch.stitch(zoom, render_all, bounds_x, bounds_z) print("Done!")
def test_merges_lists(lists, merged): linked_lists = [to_linked_list(_list) for _list in lists] assert merge(linked_lists) == to_linked_list(merged)
liste = f.readlines() for item in liste: item=item.rstrip("\n") item=item+".OL" tickerList.append(item) #Stripping newline in csv print(tickerList) stockList = [] for TICKER in tickerList: stockList.append(stockClass.Stock(TICKER)) # for stock in stockList: stock.downloadData('2005-01-01',today) #Download data stock.readData() stock.fixNa() #Read downloaded data print(stock.data.head()) #Print first 5 rows of df stock = stockList[1] #Do operation on one stock # # stock.plotData() stock.plotRollingMean(10,20,60) #Plot stock with rolling mean for x,y and z days # print(stock.data.head()) merger.merge(stockList, saveFile=True) #Merge all stocks to one big df and save to csv
def test_linebreaks_are_also_separated_across_each_string(self): first_string = 'hello world.\nhow are you?' second_string = 'hi there.\ni am fine.' merged = merge(first_string, second_string, 40) self.assertEqual(merged, 'hello world. hi there.\n' + \ 'how are you? i am fine.')
return climber.tour.node_ids if __name__ == "__main__": xy = reader.read_xy("input/berlin52.tsp") xy = reader.read_xy("input/xqf131.tsp") best_climber = TwoOpt(xy) best_climber.optimize() best_length = best_climber.tour.tour_length() print("local optimum: " + str(best_length)) test_climber = TwoOpt(xy) for i in range(1000): test_climber.tour.reset(best_climber.tour.node_ids) test_climber.tour.double_bridge_perturbation() test_climber.optimize() print("iteration " + str(i) + " best tour length: " + str(best_length)) test_tour_length = test_climber.tour.tour_length() print("iteration " + str(i) + " test tour length: " + str(test_tour_length)) """ if test_tour_length < best_length: best_climber.tour.reset(test_climber.tour.node_ids) best_length = best_climber.tour.tour_length() continue """ merger.merge(best_climber, test_climber.tour.node_ids) best_length = best_climber.tour.tour_length() print("final tour length: " + str(best_length))
from merger import merge from sender import send_mail from receiver import receive_mail import io, os, imapclient, pyzmail, smtplib, ssl, PyPDF2 from os.path import basename from email.mime.application import MIMEApplication from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from email.utils import COMMASPACE, formatdate import config from config import get_conf conf_data = get_conf(config) email = conf_data['email'] password = conf_data['password'] server = conf_data['server'] port = conf_data['port'] subject = 'RE:OCR' text = 'Check your file at attachment\n\nCreated by automatic python merger\n\nPython Power by Dimkin' basedir = 'c:\\!git\\PDFmerger\\process\\' files = ['C:\\!git\\PDFmerger\\process\\combined.pdf'] to = [receive_mail(email, password, basedir)] merge(basedir) send_mail(email, to, subject, text, basedir, files, password, server, port)
def get_data(force, gene, pseudogene, reverse_complement, parser, fix_karolinska, genome_range, gene_ids, coordinate, patch, post_process, functional_exceptions, unique_regions, max_cn, custom_url=None): def sf(x): y = re.split(r'(\d+)', x[len(gene):]) return int(y[1]), y[2] # Get Karolinska's data cypdata = karolinska.get_karolinska_database(gene, parser, force, custom_url) if fix_karolinska is not None: fix_karolinska(cypdata) #pprint (cypdata) # Get NCBI data for genes and reference genome genes, hg19 = ncbi.get_genomes(gene_ids[0], genome_range, gene_ids[1:], force=force, reverse_complement=reverse_complement) new_seq = genes[gene].seq.tomutable() for c, n in patch: new_seq[coordinate(c, genes[gene])] = n genes[gene] = genes[gene]._replace(seq=new_seq.toseq()) # Fix Karolinska's coordinates result = merger.merge(cypdata, genes[gene], coordinate, functional_exceptions, reverse_complement) ## pprint(genes['CYP21'].translation) ## pprint(genes['CYP21P'].translation) mx = collections.defaultdict(lambda: ['', []]) for a in result: for m in result[a]['mutations']: mx[(m['pos'], m['op'])][0] = m mx[(m['pos'], m['op'])][1].append(a) for m in genes[gene].pseudo_mutations.values(): m['functional'] = merger.is_functional( genes[gene], m, genes[gene].pseudo_mutations.values(), True) # if (m['pos'], m['op']) in mx: # log.warn('[{}] {} (from {}) originates from {}', # ' F'[mx[(m['pos'], m['op'])][0]['functional']], # mx[(m['pos'], m['op'])][0]['old'], # ','.join(set(mx[(m['pos'], m['op'])][1])), # m['old'] # ) # Remove mutations not present in hg19 and fix the coordinates for a in result: for m in result[a]['mutations']: if m['pos'] == 'pseudogene': continue if m['pos'] not in genes[gene].translation: log.warn('Main: Translation not found for {}: {} ({})', a, m['old'], m['pos']) m['pos'] = None else: m['pos'] = genes[gene].translation[m['pos']] result[a]['mutations'] = [ m for m in result[a]['mutations'] if not m['pos'] is None ] # Fetch missing dbSNP links result = dbsnp.get_dbsnp(result, genome_range, force) # Fix exon and intron coordinates for _, g in genes.iteritems(): g.exons[:] = map( lambda x: (g.translation[int(x.start)], g.translation[int(x.end)]), g.exons) g.introns[:] = map( lambda x: (g.translation[int(x.start)], g.translation[int(x.end)]), g.introns) # Patch hg19 with reference SNPs hg19 = list(hg19) for gi, hi in genes[gene].translation.iteritems(): if hg19[hi - genome_range[1]] != genes[gene].seq[gi]: hg19[hi - genome_range[1]] = genes[gene].seq[gi] hg19 = ''.join(hg19) result.update({ gene + '*1': { 'mutations': [], 'phenotype': { 'invivo': 'Normal', 'invitro': 'Normal' } } }) # Add missing regions post_process(genes, result) hoi = collections.OrderedDict() for pos, m in genes[gene].pseudo_translation.iteritems(): hoi[genes[gene].translation[pos]] = NoIndent( (genes[pseudogene].translation[m['old_pos']], m['op'] if 'op' in m else '')) return dict( #map=hoi, seq=hg19, region=NoIndent(genome_range), name=gene, exons={ '{}'.format(ei + 1): NoIndent(e) for ei, e in enumerate(genes[gene].exons) }, special_regions={ g: NoIndent(gg) for g, gg in genes[gene].special_regions.iteritems() }, pseudogenes={ g: { 'exons': { '{}'.format(ei + 1): NoIndent(e) for ei, e in enumerate(genes[g].exons) }, 'special_regions': { g: NoIndent(gg) for g, gg in genes[g].special_regions.iteritems() } } for g in [pseudogene] } if pseudogene is not None else {}, # Regions used for CNV detection of each gene unique_regions=NoIndent(unique_regions), # Unique CYP2D8 region used for CNV detection # Based on BLAT, that is [5e-4i-4e] cnv_region=NoIndent(('chr22', 42547463, 42548249)), alleles=OrderedDict([(a, { 'phenotype': NoIndent(result[a]['phenotype']), 'mutations': [ NoIndent( OrderedDict([(x, y[x]) for x in sorted(y, reverse=True)])) for y in result[a]['mutations'] ] }) for a in sorted(result, key=sf)]), max_cn=max_cn)
def main(repo_path, commit_hash): """ :param repo_path: The path to the repository to mine. :type repo_path: str :param commit_hash: The commit hash of the commit to untangle. :type commit_hash: str """ repo = Repo(args.repo_path) git = repo.git commit = repo.commit(commit_hash) changes = commit_splitter.collect_changes(repo, commit, repo_path) static_call_graph = call_graph.generate_call_graph(git, commit_hash, args.repo_path) method_index = call_graph.generate_method_index(args.repo_path) change_matrix = {} for change_1 in changes: for change_2 in changes: change_pair = (change_1, change_2) if change_1 == change_2: score = None else: # 0 means changes are close, 1 means they are far file_distance = confidence_voters.calculate_file_distance( *change_pair) package_distance = confidence_voters.calculate_package_distance( commit.tree, *change_pair) call_graph_distance = confidence_voters.calculate_call_graph_distance( static_call_graph, method_index, *change_pair) co_change_frequency = confidence_voters.calculate_co_change_frequency( repo, *change_pair) voters = [ file_distance, package_distance, call_graph_distance, co_change_frequency ] voters = [v for v in voters if v >= 0 and v <= 1] sum = 0 for v in voters: sum += v score = sum / len(voters) if not change_matrix.get(change_pair[0]): change_matrix[change_pair[0]] = {} change_matrix[change_pair[0]][change_pair[1]] = score final_matrix = merger.merge(change_matrix, 0.4) for change in final_matrix.keys(): print(str(change))
def get_database(query, files): people = colector.parse_data(files) database = merger.merge(people) response = data_filter.to_filter(database, query) return response