def finalizeCodeOutput(self, command, output, workingDir): """ This method is called by the RAVEN code at the end of each run (if the method is present, since it is optional). It can be used for those codes, that do not create CSV files to convert the whatever output format into a csv This methods also calls the method 'mergeOutput' if MPI mode is used, in order to merge all the output files into one. @ In, command, string, the command used to run the just ended job @ In, output, string, the Output name root @ In, workingDir, string, current working dir @ Out, None """ relapPhisicsCsv = 'relapPhisics' # RELAP post processing self.Relap5Interface.finalizeCodeOutput(command, self.outFileName, workingDir) # PHISICS post processing self.PhisicsInterface.finalizeCodeOutput(command, output, workingDir, phiRel=True, relapOut=self.outFileName) jobTitle = self.PhisicsInterface.jobTitle combine.combine(workingDir, os.path.join(workingDir, self.outFileName + '.csv'), os.path.join(workingDir, jobTitle + '.csv'), self.depTimeDict, self.inpTimeDict, relapPhisicsCsv + '.csv') # remove the old CSVs if os.path.exists(os.path.join(workingDir, self.outFileName + '.csv')): os.remove(os.path.join(workingDir, self.outFileName + '.csv')) if os.path.exists(os.path.join(workingDir, self.outFileName + '.csv')): os.remove(os.path.join(workingDir, jobTitle + '.csv')) return relapPhisicsCsv
def archive(args): realoutput = '' output = '' inputs = [] uncompiled = [] skipping = True for arg in args[1:]: if len(realoutput) == 0 and (arg[0] == '-' or len(arg) == 1): continue elif (len(realoutput) == 0): realoutput = os.path.abspath(arg) output = callconfig.cachefile(realoutput) else: inputs.append(arg) if not len(inputs): return makecachedir(output) print "callcatcher - detecting archiving:" print "\tautojoining", \ realoutput, "from\n\t", inputs combine.combine(output, inputs) print "callcatcher - dump currently unused:" print "\tUse \"callanalyse\" to manually analyse a set of compiler output files" print "\tautoanalysing", realoutput print "\tCurrently unused functions are..." analyse.analyse(output, "\t\t")
def archive(args): realoutput = '' output = '' inputs = [] uncompiled = [] skipping = True; for arg in args[1:]: if len(realoutput) == 0 and (arg[0] == '-' or len(arg) == 1): continue; elif (len(realoutput) == 0): realoutput = os.path.abspath(arg) output = callconfig.cachefile(realoutput) else: inputs.append(arg) if not len(inputs): return makecachedir(output) print "callcatcher - detecting archiving:" print "\tautojoining", \ realoutput, "from\n\t", inputs combine.combine(output, inputs) print "callcatcher - dump currently unused:" print "\tUse \"callanalyse\" to manually analyse a set of compiler output files" print "\tautoanalysing", realoutput print "\tCurrently unused functions are..." analyse.analyse(output, "\t\t")
def start(): wc=0 with open('file.txt','r') as f: for line in f: for word in line.split(): i=1 for ltr in word: wti.txt_to_img(ltr,str(i)) i=i+1 wc=wc+1 cm.combine(i,wc)
def link(args): realoutput = abslinkoutput(args) output = callconfig.cachefile(realoutput) inputs = [] fakeargs = [ args[0], ] uncompiled = [] skip = False for arg in args[1:]: if skip: skip = False continue if arg[0] == '-' and len(arg) > 1 and arg[1] != 'o': if arg[1] == 'l': print 'linking against lib' + arg[2:] + '[.so|.a]' fakeargs.append(arg) elif arg == '-o': skip = True else: name, suffix = os.path.splitext(arg) if suffix == '.c' or suffix == '.cc' \ or suffix == '.cp' or suffix == '.cxx' \ or suffix == '.cpp' or suffix == '.CPP' \ or suffix == '.c++' or suffix == '.C' \ or suffix == '.s': inputs.append(name + '.o') uncompiled.append(arg) else: inputs.append(arg) if len(uncompiled): print 'callcatcher - linkline contains source files, forcing',\ 'compile of:' print '\t', uncompiled fakeargs.append('-c') for uncompile in uncompiled: compileline = fakeargs compileline.append(uncompile) compile(compileline) if not len(inputs): return makecachedir(output) print "callcatcher - detecting link:" print "\tautojoining", \ realoutput, "from\n\t", inputs combine.combine(output, inputs) print "callcatcher - dump currently unused:" print "\tUse \"callanalyse\" to manually analyse a set of compiler output files" print "\tautoanalysing", realoutput print "\tCurrently unused functions are..." analyse.analyse(output, "\t\t")
def link(args): realoutput = abslinkoutput(args) output = callconfig.cachefile(realoutput) inputs = [] fakeargs = [ args[0], ] uncompiled = [] skip = False for arg in args[1:]: if skip: skip = False continue if arg[0] == '-' and len(arg) > 1 and arg[1] != 'o': if arg[1] == 'l': print 'linking against lib' + arg[2:] + '[.so|.a]' fakeargs.append(arg) elif arg == '-o': skip = True else: name, suffix = os.path.splitext(arg) if suffix == '.c' or suffix == '.cc' \ or suffix == '.cp' or suffix == '.cxx' \ or suffix == '.cpp' or suffix == '.CPP' \ or suffix == '.c++' or suffix == '.C' \ or suffix == '.s': inputs.append(name + '.o') uncompiled.append(arg) else: inputs.append(arg) if len(uncompiled): print 'callcatcher - linkline contains source files, forcing',\ 'compile of:' print '\t', uncompiled fakeargs.append('-c') for uncompile in uncompiled: compileline = fakeargs compileline.append(uncompile) compile(compileline) if not len(inputs): return makecachedir(output) print "callcatcher - detecting link:" print "\tautojoining", \ realoutput, "from\n\t", inputs combine.combine(output, inputs) print "callcatcher - dump currently unused:" print "\tUse \"callanalyse\" to manually analyse a set of compiler output files" print "\tautoanalysing", realoutput print "\tCurrently unused functions are..." analyse.analyse(output, "\t\t")
def duplicateEquiv(event, duplDict, debug): """ If the event (event tree) has arguments which have Equiv-statements, create a new event for each combination. Otherwise, return just the existing event. """ argList = [] # depth-first argument list hasEquiv = getArgs(event, argList) if not hasEquiv: return [event] if debug: print "----------------------------------------------" print "Event:", event.id, event.type, event.arguments print " Orig. Duplicates:", argList combinations = combine.combine(*argList) # make all combinations if debug: print " Dup. Combinations:", combinations newEvents = [] count = 0 # used only for marking duplicates' ids for combination in combinations: createdEvents = makeEvent(event, combination, count, duplDict=duplDict, debug=debug) newEvent = createdEvents[0] if debug: for createdEvent in createdEvents: if createdEvent == newEvent: print " New Event (root):", createdEvent.id, createdEvent.type, createdEvent.arguments else: print " New Event:", createdEvent.id, createdEvent.type, createdEvent.arguments Validate.validate([createdEvent], simulation=True) newEvents.append(newEvent) count += 1 return newEvents
def addEvent(self, arguments, sentenceObject, umType="unknown", forceAdd=False, predictionStrength=None): # Collect e2 entities linked by this event e1Id = None argEntities = [[]] * (len(arguments)) for i in range(len(arguments)): arg = arguments[i] argE1Id = arg.get("e1") # Take the entity trigger node from the e1 attribute of the argument if e1Id != None: # trigger has already been found assert e1Id == argE1Id else: # find the trigger e1Id = argE1Id origE1 = sentenceObject.entitiesById[argE1Id] e2Id = arg.get("e2") origE2 = sentenceObject.entitiesById[e2Id] e2HeadOffset = origE2.get("headOffset") e2Type = origE2.get("type") argEntities[i] = self.entitiesByHeadByType[e2HeadOffset][e2Type] if len(argEntities[i]) == 0: assert forceAdd argEntities[i] = [self.addEntity(origE2)] entityCombinations = combine.combine(*argEntities) for combination in entityCombinations: root = self.addEntity(origE1) root.set("umType", umType) if predictionStrength != None: root.set("umStrength", str(predictionStrength)) for i in range(len(arguments)): self.addInteraction(root, combination[i], arguments[i])
def duplicateEquiv(event, duplDict, debug): """ If the event (event tree) has arguments which have Equiv-statements, create a new event for each combination. Otherwise, return just the existing event. """ argList = [] # depth-first argument list hasEquiv = getArgs(event, argList) if not hasEquiv: return [event] if debug: print "----------------------------------------------" print "Event:", event.id, event.type, event.arguments print " Orig. Duplicates:", argList combinations = combine.combine(*argList) # make all combinations if debug: print " Dup. Combinations:", combinations newEvents = [] count = 0 # used only for marking duplicates' ids for combination in combinations: createdEvents = makeEvent(event, combination, count, duplDict=duplDict, debug=debug) newEvent = createdEvents[0] if debug: for createdEvent in createdEvents: if createdEvent == newEvent: print " New Event (root):", createdEvent.id, createdEvent.type, createdEvent.arguments else: print " New Event:", createdEvent.id, createdEvent.type, createdEvent.arguments Validate.validate([createdEvent], simulation=True) newEvents.append(newEvent) count += 1 return newEvents
def apply_peierls_to_template(template, xyz_offset=(0, 0, 0)): """Adds p.orbital argument to the hopping functions.""" template = deepcopy(template) # Needed because kwant.Builder is mutable x0, y0, z0 = xyz_offset lat = template.lattice a = np.max(lat.prim_vecs) # lattice contant def phase(site1, site2, B_x, B_y, B_z, orbital, e, hbar): if orbital: x, y, z = site1.tag direction = site1.tag - site2.tag A = [B_y * (z - z0) - B_z * (y - y0), 0, B_x * (y - y0)] A = np.dot(A, direction) * a**2 * 1e-18 * e / hbar phase = np.exp(-1j * A) if lat.norbs == 2: # No PH degrees of freedom return phase elif lat.norbs == 4: return np.array( [phase, phase.conj(), phase, phase.conj()], dtype="complex128") else: # No orbital phase return 1 for (site1, site2), hop in template.hopping_value_pairs(): template[site1, site2] = combine(hop, phase, operator.mul, 2) return template
def batchcombine(nodes, datadir, datafile, timestep, nodex, nodey, nodez, ncap, nprocx, nprocy, nprocz, fields, ncompositions): # paste batchpaste(datadir, datafile, fields, timestep, nodes) # combine import combine combined_files = combine.combine(datafile, fields, timestep, nodex, nodey, nodez, ncap, nprocx, nprocy, nprocz) # delete pasted files import glob filenames = glob.glob('%(datafile)s.*.%(timestep)d.pasted' % vars()) import os for filename in filenames: os.remove(filename) # create .general file import dxgeneral dxgeneral.write(fields, ncompositions, combined_files) return
def getmodel(model, g, args): if model == 'deepwalk': return deepwalk.DeepWalk(graph=g, batch_size=args.batch_size, fac=args.epoch_fac, window=args.window_size, degree_bound=args.degree_bound, degree_power=args.degree_power) if model == 'app': return app.APP(graph=g, batch_size=args.batch_size, stop_factor=args.app_jump_factor, sample=args.app_sample, step=args.app_step) if model == 'deepwalk,app': return combine.combine(g, args) if model == 'generalwalk': return generalwalk.GeneralWalk(g, batch_size=args.batch_size, fac=args.epoch_fac, window=args.window_size, degree_bound=args.degree_bound, degree_power=args.degree_power) if model == 'dumpwalk': return dumpwalk.dumpwalk(g, fac=args.epoch_fac, window=args.window_size, degree_bound=args.degree_bound, degree_power=args.degree_power) if model == 'fixedpair': return fixedpair.fixedpair(g, pair_file=args.pair_file) if model == 'rw2vc': return rw2vc.rw2vc(graph=g, rw_file=args.rw_file, emb_file=args.output, window=args.window_size, emb_model=args.emb_model, rep_size=args.representation_size, epoch=args.epochs, batch_size=args.batch_size, learning_rate=args.lr, negative_ratio=args.negative_ratio) model_list = ['app', 'deepwalk', 'deepwalk,app', 'rw2vc', 'generalwalk', 'dumpwalk', 'fixedpair'] print ("The sampling method {} does not exist!", model) print ("Please choose from the following:") for m in model_list: print(m) exit()
def buildExamples(self, sentenceGraph): self.makeGSEvents(sentenceGraph) eventNodes = [] nameNodes = [] for entity in sentenceGraph.entities: if entity.get("type") == "neg": continue if entity.get("isName") == "True": nameNodes.append(entity) else: eventNodes.append(entity) allNodes = eventNodes + nameNodes examples = [] exampleIndex = 0 undirected = sentenceGraph.dependencyGraph.to_undirected() paths = NX.all_pairs_shortest_path(undirected, cutoff=999) for eventNode in eventNodes: eventType = eventNode.get("type") if eventType in [ "Gene_expression", "Transcription", "Protein_catabolism", "Localization", "Phosphorylation", ]: for nameNode in nameNodes: if self.isPotentialGeniaInteraction(eventNode, nameNode): examples.append(self.buildExample(exampleIndex, sentenceGraph, paths, eventNode, nameNode)) exampleIndex += 1 elif eventType in ["Regulation", "Positive_regulation", "Negative_regulation"]: combinations = combine.combine(allNodes + [None], allNodes + [None]) for combination in combinations: if combination[0] == combination[1]: continue if combination[0] == eventNode or combination[1] == eventNode: continue if combination[0] != None and not self.isPotentialGeniaInteraction(eventNode, combination[0]): continue if combination[1] != None and not self.isPotentialGeniaInteraction(eventNode, combination[1]): continue examples.append( self.buildExample(exampleIndex, sentenceGraph, paths, eventNode, combination[0], combination[1]) ) exampleIndex += 1 elif eventType in ["Binding"]: continue else: assert False, eventType self.gsEvents = None return examples
def main(): parser = argparse.ArgumentParser(description='The Creator of Combinators') parser.add_argument("-d" , "--datacard" , type=str, required=True) parser.add_argument("-n" , "--name" , type=str, required=True) parser.add_argument("-a" , "--asimov" , action="store_true") parser.add_argument("-r" , "--range" , nargs='+', default=[0,10]) parser.add_argument("-v" , "--verbose" , type=int, default=0) options = parser.parse_args() print (" -- datacard : ", options.datacard) print (" -- name : ", options.name) print (" -- name : ", options.asimov) combine.combine( options.datacard, mass=125, name = options.name, toys = -1 if options.asimov else 0, points =1000, robust_fit=True, #parameter_ranges="r={},{}".format(options.range[0], options.range[1]), verbose=options.verbose )
def combine_processed_files(self): for name in self.matched_list: tname = name[1]+"-preprocessed"+"-transformed" ndi_file = os.path.join(self.processed,tname) pname = name[0]+"-preprocessed" gripper_file = os.path.join(self.processed,pname) m = c.combine(ndi_file,gripper_file) fname = name[0].split("-")[0] combined_file = os.path.join(self.results,fname) if m.merge_data(combined_file) == 0: my_logger.info("Could not create combined file = {}".format(combined_file)) print("Could not create combined file = {}".format(combined_file))
def add_disorder_to_template(template): # Only works with particle-hole + spin DOF or only spin. template = deepcopy(template) # Needed because kwant.Builder is mutable s0 = np.eye(2, dtype=complex) sz = np.array([[1, 0], [0, -1]], dtype=complex) s0sz = np.kron(s0, sz) norbs = template.lattice.norbs mat = s0sz if norbs == 4 else s0 def onsite_disorder(site, disorder, salt): return disorder * (uniform(repr(site), repr(salt)) - 0.5) * mat for site, onsite in template.site_value_pairs(): onsite = template[site] template[site] = combine(onsite, onsite_disorder, operator.add, 1) return template
def boundingBox(colorimg, bwimg, i): orig = colorimg.copy() bwimg = np.clip(bwimg * SENSITIVITY, 0, 255) bwimg = bwimg.astype(np.uint8) #threshold image ret, threshed_img = cv2.threshold(bwimg, 127, 255, cv2.THRESH_BINARY) # find countours contours, hier = cv2.findContours(threshed_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) rois = [] # x0, y0, x1, y1 for c in contours: x, y, w, h = cv2.boundingRect(c) if w + h > MIN_SIZE: x0 = int(x - w * BUFFER) x1 = int(x + w * (BUFFER + 1)) y0 = int(y - h * BUFFER) y1 = int(y + h * (BUFFER + 1)) cv2.rectangle(colorimg, (x0, y0), (x1, y1), (0, 0, 255), 2) rois.append((x0, y0, x1, y1)) rect = cv2.minAreaRect(c) box = cv2.boxPoints(rect) box = np.int0(box) combinedRois = combine(rois, orig.shape) #print(combinedRois) filteredRois = [] for r in combinedRois: if (r[3] - r[1]) * (r[2] - r[0]) > 160: filteredRois.append(r) return filteredRois
def getParameterCombinations(parameters): parameterNames = parameters.keys() parameterNames.sort() parameterNames.reverse() # to put trigger parameter first (allows optimized 3-parameter grid) parameterValues = [] for parameterName in parameterNames: parameterValues.append([]) values = parameters[parameterName] if isinstance(values, (list, tuple)): for value in values: parameterValues[-1].append( (parameterName,value) ) else: parameterValues[-1].append( (parameterName,values) ) combinationLists = combine.combine(*parameterValues) combinations = [] for combinationList in combinationLists: combinations.append({}) for value in combinationList: combinations[-1][value[0]] = value[1] return combinations
def getParameterCombinations(parameters): parameterNames = parameters.keys() parameterNames.sort() parameterNames.reverse( ) # to put trigger parameter first (allows optimized 3-parameter grid) parameterValues = [] for parameterName in parameterNames: parameterValues.append([]) values = parameters[parameterName] if isinstance(values, (list, tuple)): for value in values: parameterValues[-1].append((parameterName, value)) else: parameterValues[-1].append((parameterName, values)) combinationLists = combine.combine(*parameterValues) combinations = [] for combinationList in combinationLists: combinations.append({}) for value in combinationList: combinations[-1][value[0]] = value[1] return combinations
def finalizeEntity(self, entity): example = exampleByEntity[entity.get("id")] argTokens = [] argTypes = [] for argKey in ["ct", "tt"]: if example[3].has_key(argKey): if example[3][argKey].find(",") != -1: splits = example[3][argKey].split(",") argTokens.extend(splits) if argKey == "tt": argTypes.extend(len(splits) * ["Theme"]) else: argTypes.extend(len(splits) * ["Cause"]) else: argTokens.append(example[3][argKey]) if argKey == "tt": argTypes.append("Theme") else: argTypes.append("Cause") argNodes = [] for argToken in argTokens: for argNode in headTokenToEntity[argToken]: if self.entityFinalStatus[argNode.get( "id")] != False: # avoid self-interaction argNodes.append(argNode) combinations = combine.combine(*argNodes) # get existing event entities existingEntities = [] for entity in headTokenToEntity[example[3]["et"]]: if entity.get("type") == self.predictionsByExample[example[0]][0]: existingEntities.append(entity) # add arguments and make new event entities as needed for i in range(len(combinations)): if i > len(existingEntities) - 1: existingEntities.append(self.addEntityNode()) eventEntity = existingEntities[i] for j in len(combination): self.addArgumentEdge(eventEntity, combination[j], argTypes[j]) # Mark as final self.entityFinalStatus[eventEntity] = True
def finalizeEntity(self, entity): example = exampleByEntity[entity.get("id")] argTokens = [] argTypes = [] for argKey in ["ct","tt"]: if example[3].has_key(argKey): if example[3][argKey].find(",") != -1: splits = example[3][argKey].split(",") argTokens.extend( splits ) if argKey == "tt": argTypes.extend( len(splits) * ["Theme"] ) else: argTypes.extend( len(splits) * ["Cause"] ) else: argTokens.append( example[3][argKey] ) if argKey == "tt": argTypes.append("Theme") else: argTypes.append("Cause") argNodes = [] for argToken in argTokens: for argNode in headTokenToEntity[argToken]: if self.entityFinalStatus[argNode.get("id")] != False: # avoid self-interaction argNodes.append(argNode) combinations = combine.combine(*argNodes) # get existing event entities existingEntities = [] for entity in headTokenToEntity[example[3]["et"]]: if entity.get("type") == self.predictionsByExample[example[0]][0]: existingEntities.append(entity) # add arguments and make new event entities as needed for i in range(len(combinations)): if i > len(existingEntities) - 1: existingEntities.append( self.addEntityNode() ) eventEntity = existingEntities[i] for j in len(combination): self.addArgumentEdge(eventEntity, combination[j], argTypes[j]) # Mark as final self.entityFinalStatus[eventEntity] = True
def finalizeCodeOutput(self, command, output, workingDir): """ This method is called by the RAVEN code at the end of each run (if the method is present, since it is optional). It can be used for those codes, that do not create CSV files to convert the whatever output format into a csv This methods also calls the method 'mergeOutput' if MPI mode is used, in order to merge all the output files into one. @ In, command, string, the command used to run the just ended job @ In, output, string, the Output name root @ In, workingDir, string, current working dir @ Out, None """ # RELAP post processing dataRelap = self.Relap5Interface.finalizeCodeOutput( command, self.outFileName, workingDir) # PHISICS post processing dataPhisics = self.PhisicsInterface.finalizeCodeOutput( command, output, workingDir, phiRel=True, relapOut=self.outFileName) cmb = combine.combine(workingDir, dataRelap, dataPhisics, self.depTimeDict, self.inpTimeDict) response = cmb.returnData() return response
def read(self, input_file, force=False): """read reads in a judo file and attempts to decrypt the secret from available shards. """ judo_file = self.read_judo_file(input_file) output_file = judo_file['filename'] secret_type = judo_file['type'] secret_id = judo_file['secretId'] urls = judo_file['index'] # get the shards shards = self.get_shards(secret_id, urls) # combine magic result, result_string = combine(shards, judo_file) if secret_type == 2: file = open(output_file, 'w') file.write(result) file.close() else: print('Decrypted secret: {}'.format(result_string))
def addEvent(self, arguments, sentenceObject, umType="unknown", forceAdd=False, predictionStrength=None): # Collect e2 entities linked by this event e1Id = None argEntities = [[]] * (len(arguments)) for i in range(len(arguments)): arg = arguments[i] argE1Id = arg.get("e1") # Take the entity trigger node from the e1 attribute of the argument if e1Id != None: # trigger has already been found assert e1Id == argE1Id else: # find the trigger e1Id = argE1Id origE1 = sentenceObject.entitiesById[argE1Id] e2Id = arg.get("e2") origE2 = sentenceObject.entitiesById[e2Id] e2HeadOffset = origE2.get("headOffset") e2Type = origE2.get("type") argEntities[i] = self.entitiesByHeadByType[e2HeadOffset][e2Type] if len(argEntities[i]) == 0: assert forceAdd argEntities[i] = [self.addEntity(origE2)] entityCombinations = combine.combine(*argEntities) for combination in entityCombinations: root = self.addEntity(origE1) root.set("umType", umType) if predictionStrength != None: root.set("umStrength", str(predictionStrength)) for i in range(len(arguments)): self.addInteraction(root, combination[i], arguments[i])
#Correlate radio and BAT data import numpy as np #import sys #sys.path.insert(1, 'home/jimmy/Dropbox/projects/lag_correlation') #print(sys.path) from combine import combine radio_data = 'cygx1_radio15_1day_complete.dat' combine('../radio15_2017_2018_1day.dat', '../cygx1_radio15_1day.ascii', radio_data) combine('../radio15_2019_1day.dat', radio_data, radio_data) from cc import Correlate c = Correlate(200, 0.5, 1.0, 1.0, 'radio15_bat') rm, rf, xm, xf = c.read(radio_data, 'cygx1_bat_1day.dat', is_bat=False) c.correlate(rm, rf, xm, xf)
countries = ['' for i in orders] years = ['' for i in orders] ratings = ['' for i in orders] print(offers) while _thread._count() != 0: time.sleep(5) lists = list( zip(links, imgs, titles, offers, orders, companies, countries, years, ratings)) lists = [[url_filter(i[0])] + list(i)[1:] for i in lists if url_filter(i[0]) is not None] with open(f'{DIRNAME}/temp/{FILENAME}{page}.json', 'w') as f: json.dump(lists, f) x = driver.execute_script( 'return document.querySelector(".pages-next.disabled") || document.querySelector(".next-btn.next-btn-normal.next-btn-medium.next-pagination-item.next[disabled]")?1:0' ) print(f"current thread:{_thread._count()}/{WORKER}") _thread.start_new_thread(periodic, (lists, page)) time.sleep(1) if int(x) == 1: status = False PAGES = page page += 1 driver.close() print("Combine....") combine(DIRECTORY, FILENAME) print("DONE") time.sleep(5)
def getArgumentCombinations(self, eType, interactions, entityId=None): combs = [] if eType == "Binding": # Making examples for only all-together/all-separate cases # doesn't work, since even gold data has several cases of # overlapping bindings with different numbers of arguments #if len(interactions) > 0: # return [interactions] #else: # return interactions # Skip causes themes = [] for interaction in interactions: if interaction.get("type") == "Theme": themes.append(interaction) for i in range(len(themes)): # Looking at a2-normalize.pl reveals that there can be max 6 themes # Based on training+devel data, four is maximum if i < 10: #4: for j in combinations(themes, i+1): combs.append(j) # if len(combs) >= 100: # print >> sys.stderr, "Warning, truncating unmerging examples at 100 for Binding entity", entityId # break return combs elif eType == "Process": # For ID-task argCombinations = [] argCombinations.append([]) # process can have 0 interactions for interaction in interactions: if interaction.get("type") == "Participant": argCombinations.append([interaction]) return argCombinations else: # one of the regulation-types, or one of the simple types themes = [] causes = [] siteArgs = [] contextGenes = [] sideChains = [] locTargets = [] for interaction in interactions: iType = interaction.get("type") #assert iType in ["Theme", "Cause"], (iType, ETUtils.toStr(interaction)) if iType not in ["Theme", "Cause", "SiteArg", "Contextgene", "Sidechain"]: # "AtLoc", "ToLoc"]: continue if iType == "Theme": themes.append(interaction) elif iType == "Cause": causes.append(interaction) elif iType == "SiteArg": siteArgs.append(interaction) elif iType == "Contextgene": contextGenes.append(interaction) elif iType == "Sidechain": sideChains.append(interaction) elif iType in ["AtLoc", "ToLoc"]: locTargets.append(iType) else: assert False, (iType, interaction.get("id")) # Limit arguments to event types that can have them if eType.find("egulation") == -1 and eType != "Catalysis": causes = [] if eType != "Glycosylation": sideChains = [] if eType not in ["Acetylation", "Methylation"]: contextGenes = [] if eType == "Catalysis": siteArgs = [] # Themes can always appear alone themeAloneCombinations = [] for theme in themes: themeAloneCombinations.append([theme]) #print "Combine", combine.combine(themes, causes), "TA", themeAloneCombinations return combine.combine(themes, causes) \ + combine.combine(themes, siteArgs) \ + combine.combine(themes, sideChains) \ + combine.combine(themes, contextGenes) \ + combine.combine(themes, siteArgs, sideChains) \ + combine.combine(themes, siteArgs, contextGenes) \ + combine.combine(themes, locTargets) \ + themeAloneCombinations
def getEvents(document, inputCorpus, task=1): events = {} # event trigger entity : list of interactions pairs entityMap = {} siteMap = {} for sentenceElement in document.findall("sentence"): sentence = inputCorpus.sentencesById[sentenceElement.get("id")] # Put entities into a dictionary where they are accessible by their id for entity in sentence.entities: if task == 1 and entity.get("type") == "Entity": continue if entityMap.has_key(entity.get("id")): print >> sys.stderr, "Warning: Duplicate entity", entity.get("id"), entity.get("type") entityMap[entity.get("id")] = entity if not siteMap.has_key(entity.get("id")): siteMap[entity.get("id")] = [] # Group interactions by their interaction word, i.e. the event trigger for interaction in sentence.interactions + sentence.pairs: intType = interaction.get("type") if intType == "neg": # negative prediction continue if not (intType == "Theme" or intType == "Cause"): if task == 1: continue elif task == 2: # task 2 edges are directed (e1/site->e2/protein), so e2 is always the target siteMap[interaction.get("e2")].append(interaction) continue # All interactions are directed (e1->e2), so e1 is always the trigger e1 = sentence.entitiesById[interaction.get("e1")] assert(e1.get("isName") == "False") if not events.has_key(interaction.get("e1")): events[interaction.get("e1")] = [] # mark entity as an event trigger events[interaction.get("e1")].append(interaction) #if interaction.get("e1") == "GENIA.d10.s5.e2": # print events[interaction.get("e1")] #print "EVENTS1", events # remove empty events removeCount = 1 while removeCount > 0: removeCount = 0 for key in sorted(events.keys()): if events.has_key(key): themeCount = 0 causeCount = 0 for interaction in events[key][:]: type = interaction.get("type") assert(type=="Theme" or type=="Cause") e2 = entityMap[interaction.get("e2")] if e2.get("isName") == "False": if not events.has_key(e2.get("id")): #print "Jep" events[key].remove(interaction) continue if type == "Theme": themeCount += 1 else: causeCount += 1 if causeCount == 0 and themeCount == 0: print >> sys.stderr, "Removing: Event with no arguments", key, entityMap[key].get("type") del events[key] removeCount += 1 elif causeCount > 0 and themeCount == 0: if True: print >> sys.stderr, "Removing: Event with Cause and no Themes", key, entityMap[key].get("type") del events[key] removeCount += 1 else: # works worse, devel f-score 56.13 -> 55.62 print >> sys.stderr, "Converting Event with Cause and no Themes", key, entityMap[key].get("type") for interaction in events[key][:]: interaction.set("type", "Theme") #print "EVENTS2", events # Create duplicate events for events with multiple sites newEvents = {} # Create new events here, old events will be completely replaced for key in sorted(events.keys()): # process all events eventType = entityMap[key].get("type") interactions = events[key] sites = [[]] * len(interactions) # initialize an empty list of sites for each interaction # Pick correct sites for each interaction from siteMap intCount = 0 for interaction in interactions: sites[intCount] = siteMap[interaction.get("e2")][:] intCount += 1 # remove invalid sites for i in range(len(interactions)): interactionType = interactions[i].get("type") siteList = sites[i] for site in siteList[:]: siteType = site.get("type") locType = siteType.find("Loc") != -1 # if locType and (eventType == "Regulation" or eventType == "Gene_expression" or eventType == "Binding"): # siteList.remove(site) # if siteType == "Site" and eventType == "Transcription": # siteList.remove(site) # if siteType == "Site" and eventType == "Gene_expression": # siteList.remove(site) # if siteType == "Site" and eventType == "Cause": # siteList.remove(site) # if siteType == "CSite" and eventType == "Theme": # siteList.remove(site) # if siteType == "CSite" and eventType == "Gene_expression": # siteList.remove(site) if locType and eventType != "Localization": siteList.remove(site) if (siteType == "Site" or siteType == "CSite") and eventType not in ["Binding", "Phosphorylation", "Regulation", "Positive_regulation", "Negative_regulation"]: siteList.remove(site) # Replace empty site lists with "None", because combine.combine does not work well # with empty lists. With None, you get None in the correct places at the combinations for i in range(len(sites)): if len(sites[i]) == 0: sites[i] = [None] # Get all combinations of sites for the interactions of the events combinations = combine.combine(*sites) combCount = 0 # Create a new event for each combination of sites. If there were no sites, there # is only one combination, [None]*len(interactions). for combination in combinations: # Make up a new id that couldn't have existed before newEventTriggerId = key+".comb"+str(combCount) # Provide the new id with access to the trigger entity entityMap[newEventTriggerId] = entityMap[key] # Define the new event newEvents[newEventTriggerId] = [] for i in range(len(interactions)): # events consist of lists of (interaction, site)-tuples newEvents[newEventTriggerId].append( (interactions[i], combination[i]) ) combCount += 1 events = newEvents return events, entityMap
def coding_challenge(): scraper() tmdb() combine()
def ltsm_gen(net, seq_len, file_name, sampling_idx=0, n_steps=100, hidden_size=178, time_step=0.05, changing_note=False, note_stuck=False, remove_extra_rests=True): """ Uses the trained LSTM to generate new notes and saves the output to a MIDI file This approach uses a whole sequence of notes of one of the pieces we used to train the network, with length seq_len, which should be the same as the one used when training :param net: Trained LSTM :param seq_len: Length of input sequence :param file_name: Name to be given to the generated MIDI file :param sampling_idx: File to get the input sequence from, out of the pieces used to train the LSTM :param n_steps: Number of vectors to generate :param hidden_size: Hidden size of the trained LSTM :param time_step: Vector duration. Should be the same as the one on get_right_hand() :param changing_note: To sample from different sources at some point of the generation and add this new note to the sequence. This is done in case the generation gets stuck repeating a particular sequence over and over. :param note_stuck: To change the note if the generation gets stuck playing the same note over and over. :param remove_extra_rests: If the generation outputs a lot of rests in between, use this :return: None. Just saves the generated music as a .mid file """ notes = [] # Will contain a sequence of the predicted notes x = notes_encoded[sampling_idx][:seq_len] # Uses the input sequence for nt in x: # To start predicting. This will be later removed from notes.append(nt.cpu().numpy()) # the final output h_state = torch.zeros(1, 1, hidden_size).float().cuda() c_state = torch.zeros(1, 1, hidden_size).float().cuda() print_first = True # To print out a message if every component of a # predicted vector is less than 0.9 change_note = False for _ in range(n_steps): chosen = False # To account for when no dimension's probability is bigger than 0.9 y_pred, h_c_state = net(x, (h_state, c_state)) # Predicts the next notes for all h_state, c_state = h_c_state[0].data, h_c_state[1].data # the notes in the input sequence y_pred = y_pred.data # We only care about the last predicted note y_pred = y_pred[-1] # (next note after last note of input sequence) choose = torch.zeros((1, 1, 178)) # Coverts the probabilities to the actual note vector y_pred_left = y_pred[:, :89] for idx in range(89): if y_pred_left[:, idx] > 0.9: choose[:, :, idx] = 1 chosen = True if y_pred_left[:, -1] >= 0.7: # We add a hold condition, in case the probability choose[:, :, 88] = 1 # of having a hold is close to the one of having the pitch if not chosen: if print_first: print("\nPrinting out the maximum prob of all notes for a time step", "when this maximum prob is less than 0.9") print_first = False pred_note_idx = np.argmax(y_pred_left.cpu()) choose[:, :, pred_note_idx] = 1 if pred_note_idx != 87: # No holds for rests if y_pred_left[:, pred_note_idx] - y_pred_left[:, -1] <= 0.2: # Hold condition choose[:, :, 88] = 1 print(_, "left", y_pred_left[:, np.argmax(y_pred_left.cpu())]) # Maximum probability out of all components y_pred_right = y_pred[:, 89:] for idx in range(89): if y_pred_right[:, idx] > 0.9: choose[:, :, idx + 89] = 1 chosen = True if y_pred_right[:, -1] >= 0.7: choose[:, :, -1] = 1 if not chosen: if print_first: print("\nPrinting out the maximum prob of all notes for a time step", "when this maximum prob is less than 0.9") print_first = False pred_note_idx = np.argmax(y_pred_right.cpu()) choose[:, :, pred_note_idx + 89] = 1 if pred_note_idx != 87: # No holds for rests if y_pred_right[:, pred_note_idx] - y_pred_right[:, -1] <= 0.2: # Hold condition choose[:, :, -1] = 1 print(_, "right", y_pred_right[:, np.argmax(y_pred_right.cpu())]) # Maximum probability out of all components x_new = torch.empty(x.shape) # Uses the output of the last time_step for idx, nt in enumerate(x[1:]): # As the input for the next time_step x_new[idx] = nt # So the new sequence will be the same past sequence minus the first note x_new[-1] = choose x = x_new.cuda() # We will use this new sequence to predict in the next iteration the next note notes.append(choose.cpu().numpy()) # Saves the predicted note # Condition so that the generation does not # get stuck on a particular sequence if changing_note: if _ % seq_len == 0: if sampling_idx >= len(notes_encoded): sampling_idx = 0 change_note = True st = randint(1, 100) if change_note: x_new[-1] = notes_encoded[sampling_idx][st, :, :] change_note = False else: x_new[-1] = notes_encoded[sampling_idx][0, :, :] sampling_idx += 1 x = x_new.cuda() # Condition so that the generation does not # get stuck on a particular note if _ > 6 and note_stuck: if (notes[-1][:, :, 89:] == notes[-2][:, :, 89:]).sum(2)[0][0].numpy() in [88, 89]: if (notes[-1][:, :, 89:] == notes[-3][:, :, 89:]).sum(2)[0][0].numpy() in [88, 89]: if (notes[-1][:, :, 89:] == notes[-4][:, :, 89:]).sum(2)[0][0].numpy() in [88, 89]: if (notes[-1][:, :, 89:] == notes[-5][:, :, 89:]).sum(2)[0][0].numpy() in [88, 89]: if (notes[-1][:, :, 89:] == notes[-6][:, :, 89:]).sum(2)[0][0].numpy() in [88, 89]: for m in range(5): notes.pop(-1) if sampling_idx >= len(notes_encoded): sampling_idx = 0 x_new[-1] = notes_encoded[sampling_idx][randint(1, 100), :, :] x = x_new.cuda() sampling_idx += 1 # Gets the notes into the correct NumPy array shape gen_notes = np.empty((len(notes) - seq_len + 1, 178)) # Doesn't use the first predicted notes for idx, nt in enumerate(notes[seq_len - 1:]): # Because these were sampled from the training data gen_notes[idx] = nt[0] # Decodes the generated music gen_midi_left = decode(get_tempo_dim_back(gen_notes[:, :89], 74), time_step=time_step) # Gets rid of too many rests if remove_extra_rests: stream_left = ms.stream.Stream() for idx, nt in enumerate(gen_midi_left): if type(nt) == ms.note.Rest and idx < len(gen_midi_left) - 5: if nt.duration.quarterLength > 4 * time_step: print("Removing rest") continue if type(gen_midi_left[idx + 4]) == ms.note.Rest: print("Removing rest") continue stream_left.append(nt) else: stream_left.append(nt) else: stream_left = gen_midi_left # Same thing for right hand gen_midi_right = decode(get_tempo_dim_back(gen_notes[:, 89:], 74), time_step=time_step) if remove_extra_rests: stream_right = ms.stream.Stream() for idx, nt in enumerate(gen_midi_right): if type(nt) == ms.note.Rest and idx < len(gen_midi_right) - 5: if nt.duration.quarterLength > 4 * time_step: print("Removing rest") continue if type(gen_midi_right[idx + 4]) == ms.note.Rest: print("Removing rest") continue stream_right.append(nt) else: stream_right.append(nt) else: stream_right = gen_midi_right # Saves both hands combined as a MIDI file combine(stream_left, stream_right, file_name + ".mid")
def buildInteractions(map, sentenceElement, predictionsByExample): sentenceId = sentenceElement.get("id") interactions = [] tokenIds = sorted(map.keys()) for token in tokenIds: for exId in sorted(map[token].keys()): example = map[token][exId][2] if example == None: # named entity continue prediction = predictionsByExample[example[0]] themeNodes = [] theme2Nodes = None if example[3].has_key("tt"): if example[3]["tt"].find(",") != -1: splits = example[3]["tt"].split(",") assert len(splits) == 2 themeNodes = getEntityNodes(splits[0], map) theme2Nodes = getEntityNodes(splits[1], map) else: themeNodes = getEntityNodes(example[3]["tt"], map) else: themeNodes = [None] if example[3].has_key("ct"): causeNodes = getEntityNodes(example[3]["ct"], map) else: causeNodes = [None] if theme2Nodes == None: argCombinations = combine.combine(themeNodes, causeNodes) rootIndex = 0 #assert len(argCombinations) == len(map[token][exId][3]), (len(argCombinations), len(map[token][exId][3]), example[0], argCombinations) for combination in argCombinations: if rootIndex >= len(map[token][exId][3]): print >> sys.stderr, "Warning, all event duplicates not generated (possible cycle) for example", example[0] break rootElement = map[token][exId][3][rootIndex] # add theme edge if combination[0] != None: pairElement = ET.Element("interaction") pairElement.attrib["directed"] = "Unknown" pairElement.attrib["e1"] = rootElement.get("id") pairElement.attrib["e2"] = combination[0].get("id") pairElement.attrib["id"] = sentenceId + ".i" + str(len(interactions)) pairElement.attrib["type"] = "Theme" interactions.append(pairElement) #pairCount += 1 # add cause edge if combination[1] != None: pairElement = ET.Element("interaction") pairElement.attrib["directed"] = "Unknown" pairElement.attrib["e1"] = rootElement.get("id") pairElement.attrib["e2"] = combination[1].get("id") pairElement.attrib["id"] = sentenceId + ".i" + str(len(interactions)) pairElement.attrib["type"] = "Cause" interactions.append(pairElement) #pairCount += 1 rootIndex += 1 else: argCombinations = combine.combine(themeNodes, theme2Nodes) rootIndex = 0 #assert len(argCombinations) == len(map[token][exId][3]), (len(argCombinations), len(map[token][exId][3]), example[0], argCombinations) for combination in argCombinations: if rootIndex >= len(map[token][exId][3]): print >> sys.stderr, "Warning, all Binding duplicates not generated (possible cycle) for example", example[0] break rootElement = map[token][exId][3][rootIndex] # add theme edge if combination[0] != None: pairElement = ET.Element("interaction") pairElement.attrib["directed"] = "Unknown" pairElement.attrib["e1"] = rootElement.get("id") pairElement.attrib["e2"] = combination[0].get("id") pairElement.attrib["id"] = sentenceId + ".i" + str(len(interactions)) pairElement.attrib["type"] = "Theme" interactions.append(pairElement) #pairCount += 1 # add second theme edge if combination[1] != None: pairElement = ET.Element("interaction") pairElement.attrib["directed"] = "Unknown" pairElement.attrib["e1"] = rootElement.get("id") pairElement.attrib["e2"] = combination[1].get("id") pairElement.attrib["id"] = sentenceId + ".i" + str(len(interactions)) pairElement.attrib["type"] = "Theme" interactions.append(pairElement) #pairCount += 1 rootIndex += 1 return interactions
def main(args): programDirectory = os.path.dirname(os.path.abspath(__file__)) #read the project file projects = {} for file in os.listdir(os.path.join(programDirectory,"projects")): if file.endswith(".txt") and not file.endswith("example.txt"): with open(os.path.join(programDirectory,"projects" ,file)) as ongoing_fd: projectID=file.split("/")[-1] projectID=file.replace(".txt","") #the user has selected a project manually, and it is not this one #then there is really nothing to do. if not (args.project and (args.project != projectID)): projects[projectID]={}; for line in ongoing_fd: try: if line[0] != "#": info=line.strip(); info = info.split("\t") projects[projectID][info[0]]=info[1:] except: #the pipeline should not crash if the user adds some newlines etc to the project file pass # Read the config file (working_dir, available_tools, account, exclude,modules,recursive) = readConfigFile.readConfigFile(programDirectory) path_to_bam="" default_working_dir=working_dir for project in projects: #initiate the project parameters based on the project dictionary project_path = projects[project]["bam"] projectName = project #set the output,genmod and frequency db path if not projects[project]["output"]: working_dir = default_working_dir else: working_dir= projects[project]["output"][0] if not projects[project]["genmod"]: genmod_file = os.path.join(programDirectory,"genmod") else: genmod_file=projects[project]["genmod"][0] if not projects[project]["db"]: frequency_db=os.path.join(working_dir, project,"FindSV","database") else: frequency_db=projects[project]["db"][0] processFilesPath = os.path.join(working_dir, project,"process") #create a directory to keep track of the analysed files if not (os.path.exists(processFilesPath)): os.makedirs(processFilesPath) #initate the processFiles processFiles = initiateProcessFile(available_tools, processFilesPath) #search for the projects bam files bamfiles=detect_bam_files(project_path, projectName,path_to_bam,recursive) #function used to find variants processFiles= calling.variantCalling( programDirectory, project_path, projectName, working_dir, path_to_bam, available_tools, account, modules,bamfiles, exclude, processFiles, processFilesPath) #combine the results o the variant calling processFiles = combine.combine(programDirectory, processFiles, processFilesPath, account,bamfiles) #a function used to build databases from vcf files processFiles = database.buildDatabase(programDirectory, processFiles, processFilesPath, account) # Function that filters the variant files and finds genomic features of # the variants processFiles = filter.applyFilter(programDirectory, processFiles, processFilesPath, account,frequency_db) #function used to annotate the samples processFiles = annotation.annotation(programDirectory, processFiles, processFilesPath, account,genmod_file) #the funciton used for cleaning the vcf file, this is the final step of the pipeline processFiles = cleaning.cleaning(programDirectory, processFiles, processFilesPath, account) return
trait_dump = trait_dump + phonetics[letter] trait_set = sorted(list(set(trait_dump))) mapping_letters = [] for ascii in range(ord('a'), ord('a') + len(trait_set)): mapping_letters.append(chr(ascii)) trait_map = dict(zip(trait_set, mapping_letters)) combinations = combine( sorted([ 'voice', 'aspiration', 'generic', # 'type', # 'length', 'depth', 'tone', 'complex' ])) def getTraitCombinations(ch): trait_combs = [] for combination in combinations: ts = [] broken = False for metric in combination:
def boundingBox(colorimg, bwimg, i): orig = colorimg.copy() bwimg = np.clip(bwimg * SENSITIVITY, 0, 255) bwimg = bwimg.astype(np.uint8) #threshold image ret, threshed_img = cv2.threshold(bwimg, 127, 255, cv2.THRESH_BINARY) # find countours contours, hier = cv2.findContours(threshed_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) rois = [] # x0, y0, x1, y1 for c in contours: x, y, w, h = cv2.boundingRect(c) if w + h > MIN_SIZE: x0 = int(x - w * BUFFER) x1 = int(x + w * (BUFFER + 1)) y0 = int(y - h * BUFFER) y1 = int(y + h * (BUFFER + 1)) cv2.rectangle(colorimg, (x0, y0), (x1, y1), (0, 0, 255), 2) rois.append((x0, y0, x1, y1)) rect = cv2.minAreaRect(c) box = cv2.boxPoints(rect) box = np.int0(box) combinedRois = combine(rois, orig.shape) #print(combinedRois) preds = [] for r in combinedRois: if (r[3] - r[1]) * (r[2] - r[0]) > 160: cv2.rectangle(colorimg, (r[0], r[1]), (r[2], r[3]), (0, 255, 0), 4) try: pred, _ = rcnn(orig[r[1]:r[3], r[0]:r[2]]) #pred, colorimg[r[1]:r[3], r[0]:r[2]] = rcnn(orig[r[1]:r[3], r[0]:r[2]]) preds.append((i, r[0], r[1], pred)) except Exception as e: print(e) else: pass # dim = (colorimg.shape[1] // 4, colorimg.shape[0] // 4) # small = cv2.resize(colorimg, dim, interpolation=cv2.INTER_AREA) # cv2.imshow("Moving Objects", small) # cv2.waitKey(1) while False: # while True: key = cv2.waitKey(1) if key == 27: break elif key == 65: cv2.destroyAllWindows() exit(0) # cv2.destroyAllWindows() return preds
def optimize(self, trainSets, classifySets, parameters=defaultOptimizationParameters, evaluationClass=None, evaluationArgs={}, combinationsThatTimedOut=None): if parameters.has_key("predefined"): print >> sys.stderr, "Predefined model, skipping parameter estimation" return {"predefined": parameters["predefined"]} print >> sys.stderr, "Optimizing parameters" parameterNames = parameters.keys() parameterNames.sort() # for p in self.notOptimizedParameters: # if p in parameterNames: # parameterNames.remove(p) parameterValues = [] for parameterName in parameterNames: parameterValues.append([]) for value in parameters[parameterName]: parameterValues[-1].append((parameterName, value)) combinationLists = combine.combine(*parameterValues) combinations = [] for combinationList in combinationLists: combinations.append({}) for value in combinationList: combinations[-1][value[0]] = value[1] if combinationsThatTimedOut == None: combinationsThatTimedOut = [] # # re-add non-optimized parameters to combinations # for p in self.notOptimizedParameters: # if parameters.has_key(p): # for combination in combinations: # combination[p] = parameters[p] bestResult = None combinationCount = 1 if hasattr(self, "tempDir"): mainTempDir = self.tempDir mainDebugFile = self.debugFile for combination in combinations: print >> sys.stderr, " Parameters " + str( combinationCount) + "/" + str( len(combinations)) + ":", str(combination), skip = False #print combinationsThatTimedOut for discarded in combinationsThatTimedOut: if self._dictIsIdentical(combination, discarded): print >> sys.stderr print >> sys.stderr, " Discarded before, skipping" skip = True break if skip: continue # Make copies of examples in case they are modified fold = 1 foldResults = [] for classifyExamples in classifySets: if type(trainSets[0]) == types.StringType: trainExamples = trainSets[0] else: trainExamples = [] for trainSet in trainSets: if trainSet != classifyExamples: trainExamples.extend(trainSet) trainExamplesCopy = trainExamples if type(trainExamples) == types.ListType: trainExamplesCopy = trainExamples #ExampleUtils.copyExamples(trainExamples) classifyExamplesCopy = classifyExamples if type(classifyExamples) == types.ListType: classifyExamplesCopy = classifyExamples #ExampleUtils.copyExamples(classifyExamples) if hasattr(self, "tempDir"): self.tempDir = mainTempDir + "/parameters" + str( combinationCount) + "/optimization" + str(fold) if not os.path.exists(self.tempDir): os.makedirs(self.tempDir) self.debugFile = open(self.tempDir + "/debug.txt", "wt") timer = Timer() #trainStartTime = time.time() trainRV = self.train(trainExamplesCopy, combination) #trainTime = time.time() - trainStartTime #print >> sys.stderr, " Time spent:", trainTime, "s" print >> sys.stderr, " Time spent:", timer.elapsedTimeToString( ) if trainRV == 0: predictions = self.classify(classifyExamplesCopy) evaluation = evaluationClass(predictions, **evaluationArgs) if len(classifySets) == 1: print >> sys.stderr, evaluation.toStringConcise(" ") else: print >> sys.stderr, evaluation.toStringConcise( indent=" ", title="Fold " + str(fold)) foldResults.append(evaluation) if hasattr(self, "tempDir"): evaluation.saveCSV(self.tempDir + "/results.csv") else: combinationsThatTimedOut.append(combination) print >> sys.stderr, " Timed out" fold += 1 if len(foldResults) > 0: averageResult = evaluationClass.average(foldResults) poolResult = evaluationClass.pool(foldResults) if hasattr(self, "tempDir"): TableUtils.writeCSV( combination, mainTempDir + "/parameters" + str(combinationCount) + ".csv") averageResult.saveCSV(mainTempDir + "/parameters" + str(combinationCount) + "/resultsAverage.csv") poolResult.saveCSV(mainTempDir + "/parameters" + str(combinationCount) + "/resultsPooled.csv") if len(classifySets) > 1: print >> sys.stderr, averageResult.toStringConcise( " Avg: ") print >> sys.stderr, poolResult.toStringConcise(" Pool: ") if bestResult == None or poolResult.compare( bestResult[1] ) > 0: #: averageResult.fScore > bestResult[1].fScore: #bestResult = (predictions, averageResult, combination) bestResult = (None, poolResult, combination) # Make sure memory is released, especially important since some of the previous steps # copy examples bestResult[1].classifications = None bestResult[1].predictions = None combinationCount += 1 if hasattr(self, "tempDir"): self.debugFile.close() if hasattr(self, "tempDir"): self.tempDir = mainTempDir self.debugFile = mainDebugFile return bestResult
def buildExamples(self, sentenceGraph): self.makeGSEvents(sentenceGraph) self.multiEdgeFeatureBuilder.setFeatureVector(resetCache=True) self.triggerFeatureBuilder.initSentence(sentenceGraph) examples = [] exampleIndex = 0 #undirected = sentenceGraph.dependencyGraph.to_undirected() undirected = self.nxMultiDiGraphToUndirected(sentenceGraph.dependencyGraph) paths = NX10.all_pairs_shortest_path(undirected, cutoff=999) eventTokens = [] nameTokens = [] gazCategories = {None:{"neg":-1}} #stems = {} for token in sentenceGraph.tokens: gazText = self.getGazetteerMatch(token.get("text").lower()) if gazText != None: gazCategories[token] = self.gazetteer[gazText] else: gazCategories[token] = {"neg":-1} if token.get("id") in self.namedEntityHeadTokenIds: nameTokens.append(token) elif gazText != None: eventTokens.append(token) allTokens = eventTokens + nameTokens #if len(nameTokens) == 0: # there can be no events in this sentence # self.gsEvents = None # return [] for token in eventTokens: #gazCategories = self.gazetteer[token.get("text").lower()] #print token.get("text").lower(), gazCategories #multiargument = False potentialRegulation = False potentialBinding = False for key in gazCategories[token].keys(): if key in ["Regulation","Positive_regulation","Negative_regulation"]: #multiargument = True potentialRegulation = True break for key in gazCategories[token].keys(): if key in ["Binding"]: #multiargument = True potentialBinding = True break if potentialRegulation: combinations = combine.combine(allTokens, allTokens+[None]) else: combinations = [] for t2 in nameTokens: #allTokens: combinations.append( (t2, None) ) if potentialBinding: for i in range(len(nameTokens) - 1): for j in range(i+1, len(nameTokens)): combinations.append( ((nameTokens[i],nameTokens[j]), None) ) for combination in combinations: theme2Binding = False if type(combination[0]) == types.ListType or type(combination[0]) == types.TupleType: theme2Binding = True categoryName, eventIds = self.getGSEventType(sentenceGraph, token, combination[0], [combination[1]]) else: categoryName, eventIds = self.getGSEventType(sentenceGraph, token, [combination[0]], [combination[1]]) for id in eventIds: self.examplesByEventOrigId[id] += 1 skip = False s = self.skippedByTypeAndReason if not s.has_key(categoryName): s[categoryName] = {} if gazCategories[token].get("neg",-1) > 0.99: pass if combination[0] == combination[1]: pass #skip = True if combination[0] == token or combination[1] == token: if theme2Binding or gazCategories[combination[0]].get("Positive_regulation",-1) < 0: skip = True s[categoryName]["duparg"] = s[categoryName].get("duparg", 0) + 1 if combination[0] == None and combination[1] == None: skip = True s[categoryName]["noncmb"] = s[categoryName].get("noncmb", 0) + 1 validCat = self.isValidEvent(paths, sentenceGraph, token, combination) if validCat != "OK": #not self.isValidEvent(paths, sentenceGraph, token, combination): skip = True #s[categoryName]["valid"] = s[categoryName].get("valid", 0) + 1 s[categoryName][validCat] = s[categoryName].get(validCat, 0) + 1 if len(nameTokens) == 0: skip = True s[categoryName]["non"] = s[categoryName].get("non", 0) + 1 if theme2Binding: if gazCategories[combination[0][0]].get("neg",-1) > 0.99 or gazCategories[combination[0][1]].get("neg",-1) > 0.99: skip = True s[categoryName]["gazarg"] = s[categoryName].get("gazarg", 0) + 1 else: if gazCategories[combination[0]].get("neg",-1) > 0.99 or gazCategories[combination[1]].get("neg",-1) > 0.99: skip = True s[categoryName]["gazarg"] = s[categoryName].get("gazarg", 0) + 1 if (skip and self.negFrac == None) or (skip and self.negFrac != None and categoryName == "neg"): self.skippedByType[categoryName] = self.skippedByType.get(categoryName, 0) + 1 else: if self.negFrac == None or categoryName != "neg" or (categoryName == "neg" and self.negRand.random() < self.negFrac): self.builtByType[categoryName] = self.builtByType.get(categoryName, 0) + 1 if theme2Binding: newExample = self.buildExample(exampleIndex, sentenceGraph, paths, token, combination[0], [combination[1]]) else: newExample = self.buildExample(exampleIndex, sentenceGraph, paths, token, [combination[0]], [combination[1]]) if len(eventIds) > 0: newExample[3]["numEv"] = str(len(eventIds)) examples.append( newExample ) exampleIndex += 1 self.gsEvents = None return examples
def feature(file, a=0): if a == 0: colorSave.colorSave(file) texture.texture(file) if a == 1: combine.combine(file)
def buildExamples(self, sentenceGraph): self.makeGSEvents(sentenceGraph) self.multiEdgeFeatureBuilder.setFeatureVector(resetCache=True) self.triggerFeatureBuilder.initSentence(sentenceGraph) examples = [] exampleIndex = 0 #undirected = sentenceGraph.dependencyGraph.to_undirected() undirected = self.nxMultiDiGraphToUndirected( sentenceGraph.dependencyGraph) paths = NX10.all_pairs_shortest_path(undirected, cutoff=999) eventTokens = [] nameTokens = [] gazCategories = {None: {"neg": -1}} #stems = {} for token in sentenceGraph.tokens: gazText = self.getGazetteerMatch(token.get("text").lower()) if gazText != None: gazCategories[token] = self.gazetteer[gazText] else: gazCategories[token] = {"neg": -1} if token.get("id") in self.namedEntityHeadTokenIds: nameTokens.append(token) elif gazText != None: eventTokens.append(token) allTokens = eventTokens + nameTokens #if len(nameTokens) == 0: # there can be no events in this sentence # self.gsEvents = None # return [] for token in eventTokens: #gazCategories = self.gazetteer[token.get("text").lower()] #print token.get("text").lower(), gazCategories #multiargument = False potentialRegulation = False potentialBinding = False for key in gazCategories[token].keys(): if key in [ "Regulation", "Positive_regulation", "Negative_regulation" ]: #multiargument = True potentialRegulation = True break for key in gazCategories[token].keys(): if key in ["Binding"]: #multiargument = True potentialBinding = True break if potentialRegulation: combinations = combine.combine(allTokens, allTokens + [None]) else: combinations = [] for t2 in nameTokens: #allTokens: combinations.append((t2, None)) if potentialBinding: for i in range(len(nameTokens) - 1): for j in range(i + 1, len(nameTokens)): combinations.append( ((nameTokens[i], nameTokens[j]), None)) for combination in combinations: theme2Binding = False if type(combination[0]) == types.ListType or type( combination[0]) == types.TupleType: theme2Binding = True categoryName, eventIds = self.getGSEventType( sentenceGraph, token, combination[0], [combination[1]]) else: categoryName, eventIds = self.getGSEventType( sentenceGraph, token, [combination[0]], [combination[1]]) for id in eventIds: self.examplesByEventOrigId[id] += 1 skip = False s = self.skippedByTypeAndReason if not s.has_key(categoryName): s[categoryName] = {} if gazCategories[token].get("neg", -1) > 0.99: pass if combination[0] == combination[1]: pass #skip = True if combination[0] == token or combination[1] == token: if theme2Binding or gazCategories[combination[0]].get( "Positive_regulation", -1) < 0: skip = True s[categoryName]["duparg"] = s[categoryName].get( "duparg", 0) + 1 if combination[0] == None and combination[1] == None: skip = True s[categoryName]["noncmb"] = s[categoryName].get( "noncmb", 0) + 1 validCat = self.isValidEvent(paths, sentenceGraph, token, combination) if validCat != "OK": #not self.isValidEvent(paths, sentenceGraph, token, combination): skip = True #s[categoryName]["valid"] = s[categoryName].get("valid", 0) + 1 s[categoryName][validCat] = s[categoryName].get( validCat, 0) + 1 if len(nameTokens) == 0: skip = True s[categoryName]["non"] = s[categoryName].get("non", 0) + 1 if theme2Binding: if gazCategories[combination[0][0]].get( "neg", -1) > 0.99 or gazCategories[combination[0][1]].get( "neg", -1) > 0.99: skip = True s[categoryName]["gazarg"] = s[categoryName].get( "gazarg", 0) + 1 else: if gazCategories[combination[0]].get( "neg", -1) > 0.99 or gazCategories[combination[1]].get( "neg", -1) > 0.99: skip = True s[categoryName]["gazarg"] = s[categoryName].get( "gazarg", 0) + 1 if (skip and self.negFrac == None) or (skip and self.negFrac != None and categoryName == "neg"): self.skippedByType[categoryName] = self.skippedByType.get( categoryName, 0) + 1 else: if self.negFrac == None or categoryName != "neg" or ( categoryName == "neg" and self.negRand.random() < self.negFrac): self.builtByType[categoryName] = self.builtByType.get( categoryName, 0) + 1 if theme2Binding: newExample = self.buildExample( exampleIndex, sentenceGraph, paths, token, combination[0], [combination[1]]) else: newExample = self.buildExample( exampleIndex, sentenceGraph, paths, token, [combination[0]], [combination[1]]) if len(eventIds) > 0: newExample[3]["numEv"] = str(len(eventIds)) examples.append(newExample) exampleIndex += 1 self.gsEvents = None return examples
def buildExamples(self, sentenceGraph): self.makeGSEvents(sentenceGraph) eventNodes = [] nameNodes = [] for entity in sentenceGraph.entities: if entity.get("type") == "neg": continue if entity.get("isName") == "True": nameNodes.append(entity) else: eventNodes.append(entity) allNodes = eventNodes + nameNodes examples = [] exampleIndex = 0 undirected = sentenceGraph.dependencyGraph.to_undirected() paths = NX.all_pairs_shortest_path(undirected, cutoff=999) for eventNode in eventNodes: eventType = eventNode.get("type") if eventType in [ "Gene_expression", "Transcription", "Protein_catabolism", "Localization", "Phosphorylation" ]: for nameNode in nameNodes: if self.isPotentialGeniaInteraction(eventNode, nameNode): examples.append( self.buildExample(exampleIndex, sentenceGraph, paths, eventNode, nameNode)) exampleIndex += 1 elif eventType in [ "Regulation", "Positive_regulation", "Negative_regulation" ]: combinations = combine.combine(allNodes + [None], allNodes + [None]) for combination in combinations: if combination[0] == combination[1]: continue if combination[0] == eventNode or combination[ 1] == eventNode: continue if combination[ 0] != None and not self.isPotentialGeniaInteraction( eventNode, combination[0]): continue if combination[ 1] != None and not self.isPotentialGeniaInteraction( eventNode, combination[1]): continue examples.append( self.buildExample(exampleIndex, sentenceGraph, paths, eventNode, combination[0], combination[1])) exampleIndex += 1 elif eventType in ["Binding"]: continue else: assert False, eventType self.gsEvents = None return examples
import sharkspider as ss import combine as cb tm_suf=time.strftime("%Y%m%d-%H%M%S", time.localtime()) f_name="uniq-" + tm_suf + ".csv" dblist=[] if ((len(sys.argv) == 3) and sys.argv[1] == "test" and ('https://movie.douban.com/subject/' in sys.argv[2])): ss.crawl_url_and_print(sys.argv[2]) exit() if (len(sys.argv) == 1): dblist = cb.combine(['all.csv'], f_name, 0) gs0 = ss.g_set_cls() gs0.g_sall = set(dblist) ss.crawl_weekly_top10(gs0) ss.init_file_navigation(gs0.file_out) ss.crawl_loop(gs0) exit() elif (len(sys.argv) == 2): dblist = cb.combine(sys.argv[1:], f_name, 0) gs0 = ss.g_set_cls() gs0.g_s0 = {'https://movie.douban.com/subject/26887055/', 'https://movie.douban.com/subject/1295053/', 'https://movie.douban.com/subject/27052274/', 'https://movie.douban.com/subject/26934285/', 'https://movie.douban.com/subject/26754101/',
def ltsm_gen_v2(net, seq_len, file_name, sampling_idx=0, note_pos=0, n_steps=100, hidden_size=178, num_layers=1, time_step=0.05, changing_note=False, note_stuck=False, remove_extra_rests=False): """ Uses the trained LSTM to generate new notes and saves the output to a MIDI file The difference between this and the previous one is that we only use one note as input And then keep generating notes until we have a sequence of notes of length = seq_len Once we do, we start appending the generated notes to the final output :param net: Trained LSTM :param seq_len: Length of input sequence :param file_name: Name to be given to the generated MIDI file :param sampling_idx: File to get the input note from, out of the pieces used to train the LSTM :param note_pos: Position of the sampled input note in the source piece, default to the first note :param n_steps: Number of vectors to generate :param hidden_size: Hidden size of the trained LSTM :param num_layers: Number of layers of the trained LSTM :param time_step: Vector duration. Should be the same as the one on get_right_hand() :param changing_note: To sample from different sources at some point of the generation and add this new note to the sequence. This is done in case the generation gets stuck repeating a particular sequence over and over. :param note_stuck: To change the note if the generation gets stuck playing the same note over and over. :param remove_extra_rests: If the generation outputs a lot of rests in between, use this :return: None. Just saves the generated music as a .mid file """ notes = [] # Will contain a sequence of the predicted notes x = notes_encoded[sampling_idx][note_pos:note_pos + 1, :, :] # First note of the piece notes.append(x.cpu().numpy()) # Saves the first note h_state = torch.zeros(num_layers, 1, hidden_size).float().cuda() c_state = torch.zeros(num_layers, 1, hidden_size).float().cuda() print_first = True change_note = False for _ in range(n_steps): chosen = False # To account for when no dimension's probability is bigger than 0.9 y_pred, h_c_state = net(x, (h_state, c_state)) h_state, c_state = h_c_state[0].data, h_c_state[1].data y_pred = y_pred.data y_pred = y_pred[ -1] # We only care about the last predicted note (next note after last note of input sequence) choose = torch.zeros( (1, 1, 178)) # Coverts the probabilities to the actual note vector y_pred_left = y_pred[:, :89] for idx in range(89): if y_pred_left[:, idx] > 0.9: choose[:, :, idx] = 1 chosen = True if y_pred_left[:, -1] >= 0.7: # We add a hold condition, in case the probability choose[:, :, 88] = 1 # of having a hold is close to the one of having the pitch if not chosen: if print_first: print( "\nPrinting out the maximum prob of all notes for a time step", "when this maximum prob is less than 0.9") print_first = False pred_note_idx = np.argmax(y_pred_left.cpu()) choose[:, :, pred_note_idx] = 1 if pred_note_idx != 87: # No holds for rests if y_pred_left[:, pred_note_idx] - y_pred_left[:, -1] <= 0.2: # Hold condition choose[:, :, 88] = 1 print(_, "left", y_pred_left[:, np.argmax(y_pred_left.cpu())] ) # Maximum probability out of all components y_pred_right = y_pred[:, 89:] for idx in range(89): if y_pred_right[:, idx] > 0.9: choose[:, :, idx + 89] = 1 chosen = True if y_pred_right[:, -1] >= 0.7: choose[:, :, -1] = 1 if not chosen: if print_first: print( "\nPrinting out the maximum prob of all notes for a time step", "when this maximum prob is less than 0.9") print_first = False pred_note_idx = np.argmax(y_pred_right.cpu()) choose[:, :, pred_note_idx + 89] = 1 if pred_note_idx != 87: # No holds for rests if y_pred_right[:, pred_note_idx] - y_pred_right[:, -1] <= 0.2: # Hold condition choose[:, :, -1] = 1 # Maximum probability out of all components print(_, "right", y_pred_right[:, np.argmax(y_pred_right.cpu())]) # If the number of input sequences is shorter than the expected one if x.shape[ 0] < seq_len: # We keep adding the predicted notes to this input x_new = torch.empty((x.shape[0] + 1, x.shape[1], x.shape[2])) for i in range(x_new.shape[0] - 1): x_new[i, :, :] = x[i, :, :] x_new[-1, :, :] = y_pred x = x_new.cuda() notes.append(choose) else: # If we already have enough sequences x_new = torch.empty(x.shape) # Removes the first note for idx, nt in enumerate(x[1:]): # of the current sequence x_new[idx] = nt # And appends the predicted note to the x_new[-1] = choose # input of sequences x = x_new.cuda() notes.append(choose) # Condition so that the generation does not # get stuck on a particular sequence if changing_note: if _ % seq_len == 0: if sampling_idx >= len(notes_encoded): sampling_idx = 0 change_note = True st = randint(1, 100) if change_note: x_new[-1] = notes_encoded[sampling_idx][st, :, :] change_note = False else: x_new[-1] = notes_encoded[sampling_idx][0, :, :] sampling_idx += 1 x = x_new.cuda() # Condition so that the generation does not # get stuck on a particular note if _ > 8 and note_stuck: if (notes[-1][:, :, 89:] == notes[-2][:, :, 89:]).sum(2)[0][0].numpy() in [ 88, 89 ]: if (notes[-1][:, :, 89:] == notes[-3][:, :, 89:]).sum(2)[0][0].numpy() in [ 88, 89 ]: if (notes[-1][:, :, 89:] == notes[-4][:, :, 89:] ).sum(2)[0][0].numpy() in [88, 89]: if (notes[-1][:, :, 89:] == notes[-5][:, :, 89:] ).sum(2)[0][0].numpy() in [88, 89]: if (notes[-1][:, :, 89:] == notes[-6][:, :, 89:] ).sum(2)[0][0].numpy() in [88, 89]: for m in range(5): notes.pop(-1) if sampling_idx >= len(notes_encoded): sampling_idx = 0 x_new[-1] = notes_encoded[sampling_idx][ randint(1, 100), :, :] x = x_new.cuda() sampling_idx += 1 # Gets the notes into the correct NumPy array shape gen_notes = np.empty((len(notes) - seq_len + 1, 178)) # Doesn't use the first predicted notes for idx, nt in enumerate( notes[seq_len - 1:]): # Because at first this will be inaccurate gen_notes[idx] = nt[0] # Decodes the generated music gen_midi_left = decode(get_tempo_dim_back(gen_notes[:, :89], 74), time_step=time_step) # Gets rid of too many rests if remove_extra_rests: stream_left = ms.stream.Stream() for idx, nt in enumerate(gen_midi_left): if type(nt) == ms.note.Rest and idx < len(gen_midi_left) - 5: if nt.duration.quarterLength > 4 * time_step: print("Removing rest") continue if type(gen_midi_left[idx + 4]) == ms.note.Rest: print("Removing rest") continue stream_left.append(nt) else: stream_left.append(nt) else: stream_left = gen_midi_left # Same thing for right hand gen_midi_right = decode(get_tempo_dim_back(gen_notes[:, 89:], 74), time_step=time_step) if remove_extra_rests: stream_right = ms.stream.Stream() for idx, nt in enumerate(gen_midi_right): if type(nt) == ms.note.Rest and idx < len(gen_midi_right) - 5: if nt.duration.quarterLength > 4 * time_step: print("Removing rest") continue if type(gen_midi_right[idx + 4]) == ms.note.Rest: print("Removing rest") continue stream_right.append(nt) else: stream_right.append(nt) else: stream_right = gen_midi_right # Saves both hands combined as a MIDI file combine(stream_left, stream_right, file_name + ".mid")
from readCSVs import readStrain from readCSVs import readVoltage from cycles import splitCycles from combine import combine from filter import butterWorth from resample import resample import pandas as pd import numpy as np import matplotlib.pyplot as plt import scipy.signal as sig voltageData = readVoltage("voltage.csv") strainData = readStrain("force.csv", 0) combined = combine(voltageData, strainData, toAlign=0, flip=False) # combined['Voltage'] = butterWorth(combined['Voltage'], combined['Time V'], cutOff=0) voltageResults, strainResults = splitCycles(combined, byPeak=True, strainProminence=.001, voltageProminence=.1) strainCycles = strainResults['cycles'] strainTimes = strainResults['times'] voltageCycles = voltageResults['cycles'] voltageTimes = voltageResults['times'] newVoltageCycles = [] newStrainCycles = []