def torcURL(address, filename): print('cURL on ' + address + ' to ' + filename + '\n') bar = Bar('Running', max=100) for i in range(100): output = io.BytesIO() torcURL = pycurl.Curl() torcURL.setopt(pycurl.URL, address) torcURL.setopt(pycurl.PROXY, '127.0.0.1') torcURL.setopt(pycurl.PROXYPORT, SOCKS_PORT) torcURL.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5_HOSTNAME) torcURL.setopt(pycurl.WRITEFUNCTION, output.write) bar.next() bar.finish() try: torcURL.perform() return output.getvalue() fp = open(filename, 'wb') fp.write(output.getvalue().encode('utf-8').strip()) fp.close() except KeyboardInterrupt: raise KeyboardInterrupt except pycurl.error as e: return "Unable to reach %s (%s)" % (address, e) UnknownError() except Exception as e: UnknownError()
def evaluate(train_file_path, test_num, tagger, output_file_path): sents = parse_train_data(train_file_path) test_start = len(sents) - test_num - 1 test_data = sents[test_start:len(sents)-1] train_data = sents[0:test_start+1] print 'Training with {0} sentences'.format(len(train_data)) tagger.train(train_data) output = open(output_file_path, 'w') correct = 0 total = 0 bar = Bar('Testing with {0} sentences'.format(len(test_data)), max=len(test_data)) for s in test_data: tagged = tagger.tag(remove_tags(s)) # evaluate correct += evaluate_sentence(s, tagged) total += len(tagged) # write words = [] for t in tagged: words.append(t[0] + '_' + t[1]) output.write('\t'.join(words) + '\n') bar.next() bar.finish() output.close() return correct / float(total) * 100
def pipeline_pos(titles, descriptions, tags): def preprocess(inpt): return inpt # Create feature vectors of context and only keep images WITH context bar = Bar('Extracting features...', max=len(titles)) pos_collection = [] for i in xrange(len(titles)): # Stem words and remove stopwords for title... context = [] title = preprocess(titles[i].split(' ')) if title: context.append(title) # ... description (for each sentence) ... for desc in sent_tokenize(descriptions[i]): desc = preprocess(desc.split(' ')) if desc: context.append(desc) # ... and tagsc ts = preprocess(tags[i]) if ts: context.append(ts) pos = nltk.pos_tag_sents(context) pos = list(itertools.chain(*pos)) pos_collection.append(pos) bar.next() bar.finish() return pos_collection
def draw_poster(poster_text, textsize, inp): '''split out and highlight the words''' top_pad = 0.25 left_pad = 9 font = ImageFont.truetype("NotCourierSans.otf", textsize) #This font needs to be monopaced! im = Image.new("RGBA", (9933, 14043), "black") #A1 Size draw = ImageDraw.Draw(im) #Set up sheet to draw on print('Drawing text') bar = Bar('Processing', max=len(poster_text)) #Progress bar to entertain me while I watch this run for i, text in enumerate(poster_text): if "1969-07-21 02:56:48 CDR" in text: quote = "1969-07-21 02:56:48 CDR (TRANQ) That's one small step for man, one giant leap for mankind." text = text.split(quote) width_p1, h1 = draw.textsize(text[0], font=font) width_quote, h2 = draw.textsize(quote, font=font) draw.text((left_pad, int((i + top_pad) * textsize)), text[0], font=font, fill=(255,255,255,255)) #All text padded 4 pixels left draw.text((left_pad + width_p1, int((i + top_pad) * textsize)), quote, font=font, fill=(255,0,0,255)) draw.text((left_pad + width_p1 + width_quote, int((i + top_pad) * textsize)), text[1], font=font, fill=(255,255,255,255)) bar.next() else: draw.text((left_pad, int((i + top_pad) * textsize)), text, font=font, fill=(255,255,255,255)) bar.next() bar.finish() print('Saving image!') if inp == 'y': bleedx, bleedy = 10004, 14114 bufferx, buffery = int((bleedx - 9933) / 2), int((bleedy - 14043) / 2) bleed_im = Image.new("RGBA", (10004, 14114), "black") #Bleed area for printing bleed_im.paste(im, (bufferx, buffery)) bleed_im.save("output.png", "PNG") else: im.save("output.png", "PNG")
def pipeline_onehot(titles, descriptions, tags): # Create feature vectors of context and only keep images WITH context bar = Bar('Extracting features...', max=len(titles)) docs = [] for i in xrange(len(titles)): docs.append(u'{} {} {}'.format(titles[i], descriptions[i], ' '.join(tags[i]))) vectorizer = CountVectorizer(min_df=5) X = vectorizer.fit_transform(docs) bar = Bar('Extracting features...', max=len(docs)) idx_docs = [] for idoc, doc in enumerate(docs): idxs = X[idoc].nonzero()[1] + 1 idxs = idxs.tolist() idx_docs.append(idxs) bar.next() bar.finish() max_len = 500 bar = Bar('Merging into one matrix...', max=len(idx_docs)) for i, idx_doc in enumerate(idx_docs): features = np.zeros((1, max_len), np.int64) vec = np.array(idx_doc[:max_len]) features[0, :vec.shape[0]] = vec if i == 0: feat_flatten = csr_matrix(features.flatten()) else: feat_flatten = vstack([feat_flatten, csr_matrix(features.flatten())]) bar.next() bar.finish() return feat_flatten, vectorizer
def saveAverageImage(kitti_base, pos_labels, shape, fname, avg_num=None): num_images = float(len(pos_labels)) avg_num = min(avg_num, num_images) if avg_num is None: avg_num = num_images # avg_img = np.zeros((shape[0],shape[1],3), np.float32) avg_img = np.zeros(shape, np.float32) progressbar = ProgressBar('Averaging ' + fname, max=len(pos_labels)) num = 0 for label in pos_labels: if num >= avg_num: break num += 1 progressbar.next() sample = getCroppedSampleFromLabel(kitti_base, label) # sample = np.float32(sample) resized = resizeSample(sample, shape, label) resized = auto_canny(resized) resized = np.float32(resized) avg_img = cv2.add(avg_img, resized / float(avg_num)) progressbar.finish() cv2.imwrite(fname, avg_img)
def get_list(filename): """ Creates an array of objects out of input training file ================================== Returns: * array of objects where each object corresponds to a document ================================== """ fo = open(filename) lines = fo.readlines() fo.close() total = len(lines) obj_arr = [] vec_arr = [] bar = Bar("Processing", max=total, suffix='%(percent)d%% | %(index)d of %(max)d | %(eta)d seconds remaining.') num = 0 for each in lines: send_obj = files(each.split('\n')[0].split('\t')) send_obj.set_word_count(5) send_obj.set_pos_features() send_obj.set_punctuation_features() send_obj.set_vectors() obj_arr.append(send_obj) bar.next() bar.finish() return obj_arr
def main(): infile = raw_input('Input file name: ') if os.path.exists(infile): print '\n[!] Loading PCAP file. Please wait, it might take a while...' ips = sorted(set(p[IP].src for p in PcapReader(infile) if IP in p)) total = len(ips) print '[!] Total number of IP addresses: %d\n' % total bar = Bar('Processing', max=total) for ip in ips: get_data(ip) bar.next() bar.finish() headers = ['IP', 'OWNER','COUNTRY', 'ORGANIZATION','SERVER','DESCRIPTION'] print '\n\n' print tabulate(table,headers,tablefmt='grid') if exceptions: print '\nExceptions:' for e in exceptions: print '*\t%s' % e print '\n\n[!] Done.\n\n' else: print '[!] Cannot find file "%s"\n\tExiting...' % infile sys.exit()
def hydrate(idlist_file="data/example_dataset_tweet_ids.txt"): """ This function reads a file with tweet IDs and then loads them through the API into the database. Prepare to wait quite a bit, depending on the size of the dataset. """ ids_to_fetch = set() for line in open(idlist_file, "r"): # Remove newline character through .strip() # Convert to int since that's what the database uses ids_to_fetch.add(int(line.strip())) # Find a list of Tweets that we already have ids_in_db = set(t.id for t in database.Tweet.select(database.Tweet.id)) # Sets have an efficient .difference() method that returns IDs only present # in the first set, but not in the second. ids_to_fetch = ids_to_fetch.difference(ids_in_db) logging.warning( "\nLoaded a list of {0} tweet IDs to hydrate".format(len(ids_to_fetch))) # Set up a progressbar bar = Bar('Fetching tweets', max=len(ids_to_fetch), suffix='%(eta)ds') for page in rest.fetch_tweet_list(ids_to_fetch): bar.next(len(page)) for tweet in page: database.create_tweet_from_dict(tweet) bar.finish() logging.warning("Done hydrating!")
def gradient_descent(X, Y, iter, alpha): (rows, cols) = X.shape Xt = X.T w = numpy.zeros((len(Xt), 1)) print w.shape bar = Bar('iterations', max=iter) for i in range(0, iter): pw = w dw = 2*matrix.dot(matrix.dot(Xt,X), w) - matrix.dot(Xt, Y) # if (True): # # print "alpha " + str(alpha) # # print "E is " + str(dw.T.dot(dw).sum()) # # print dw # print w w = w - alpha*dw/rows diff =numpy.absolute(w-pw).sum() print "Diff is %f " % diff if (diff < 0.000001): bar.finish() return w # raw_input() bar.next() bar.finish() return w
def main(args): d = json.load(open(args.c, 'r')) np.random.seed(1234) im2id = {} id2cap = {} print 'img 2 id....' for im in d['images']: im2id[im['file_name']] = im['id'] bar = Bar('id 2 cap...', max=len(d['annotations'])) for ann in d['annotations']: cap = nltk.word_tokenize(ann['caption']) cap = ' '.join(cap).lower() if ann['image_id'] in id2cap: id2cap[ann['image_id']].append(cap) else: id2cap[ann['image_id']] = [cap] bar.next() bar.finish() with open(args.s, 'r') as f: images = f.read().split() refs = [] for im in images: refs.append('<>'.join(id2cap[im2id[im]])) with open(args.saveto, 'w') as f: print >>f, '\n'.join(refs)
class Closest(object): data = pd.DataFrame() cols = [] bar = None def __init__(self, df, cols, size): self.data = df self.cols = cols self.bar = Bar(message="Compressing Time", max=size, suffix="%(percent)d%% (%(index)d/%(max)d) ETA %(eta_td)s") return def __call__(self, row): self.bar.next() found = self.data[(self.data.restaurant_id == row.restaurant_id) & (self.data.date <= row.date)] if found.shape[0] == 0: # FIXME Do something smarter than averaging? found = self.data[(self.data.restaurant_id == row.restaurant_id)][self.cols].mean() else: found = found[self.cols].sum() # FIXME Sometimes NaNs appear if I am missing the restaurant ID. What to do? found.fillna(0, inplace=True) row[self.cols] = found return row def __del__(self): self.bar.finish()
def read_and_gen(lyric_path,file_path): """ read file and generate mp3 sound file :param file_path: :return: """ #remove original before adding new content in it if os.path.exists(file_path): os.remove(file_path) with open(lyric_path, encoding="utf-8") as file: file = file.readlines() bar = Bar('Processing', max=file.__len__()) for line in file: if is_alphabet(line[0]): #line should be spoken in en speak = gtts_extends(line,lang='en') speak.sequence_save(file_path) if is_chinese((line[0])): speak = gtts_extends(line, lang='zh') speak.sequence_save(file_path) bar.next() bar.finish() print("transform success!")
def tokenize_proteins(data, msg='Processing proteins'): """Distribute all poses into either decoys list or actives OrderedDict. Poses placed into the actives OrderedDict are further organized into sublists for each ligand. args: @data list of string lines containing pose data @msg string message to display in progress bar returns: @actives OrderedDict of all active poses gathered from data @decoys list of all decoy poses gathered from data """ actives = OrderedDict() decoys = list() bar = Bar(msg, max=len(data)) for i, line in enumerate(data): bar.next() pose = posedict(line) # Token -> List if pose['label'] == 1: # Pose -> Decoys pose['id'] = pose['ligand'] + '-' + str(i) actives.setdefault(pose['ligand'], []).append(pose) else: # Pose -> Actives decoys.append(pose) bar.finish() print "" return actives, decoys
def average_image(pos_region_generator, shape, avg_num=None): pos_regions = list(pos_region_generator) num_images = float(len(pos_regions)) if avg_num is None: avg_num = num_images else: avg_num = min(avg_num, num_images) window_dims = (shape[1], shape[0]) # avg_img = np.zeros((shape[0],shape[1],3), np.float32) avg_img = np.zeros(shape, np.float32) progressbar = ProgressBar('Averaging ', max=avg_num) num = 0 for reg in pos_regions: if num >= avg_num: break num += 1 progressbar.next() resized = reg.load_cropped_resized_sample(window_dims) resized = auto_canny(resized) resized = np.float32(resized) avg_img = cv2.add(avg_img, resized / float(avg_num)) progressbar.finish() return avg_img
def main(argv): args = argparser.parse_args() print >> sys.stderr, '# Start: Keyword Data: %s, %s, %s, %s' % (args.cc, args.week, args.pages, datetime.datetime.now().time().isoformat()) ga, gsc = initialize_service(argv, "analytics"), initialize_service(argv, "webmasters") print '"%s"\t"%s"\t"%s"\t"%s"\t"%s"\t"%s"\t"%s"\t"%s"\t"%s"\t"%s"' % ("cc", "website", "url", "date", "keyword", "impressions", "clicks", "ctr", "position", "sessions (week)") bar = Bar('Processing', max=args.pages, suffix ='%(percent).1f%% - %(eta)ds') for website in GA_IDS[args.cc]: urls = get_top_landing_pages(ga, args.cc, website, args.week, args.pages) for row in urls: data = [] # we switched from http to https between week 3 and 4 if (args.week <= 4 and args.cc != 'VN') or website != "IPRICE": data.extend(get_keyword_data(gsc, args.cc, website, args.week, row[0][1:], "http")) if (args.week >=3 or args.cc == 'VN') and website == "IPRICE": data.extend(get_keyword_data(gsc, args.cc, website, args.week, row[0][1:], "https")) output(args.cc, website, row[0], row[1], data) bar.next() bar.finish() print >> sys.stderr, '# End: Keyword Data: %s, %s, %s, %s' % (args.cc, args.week, args.pages, datetime.datetime.now().time().isoformat())
def set_image_objects(self): landsat8 = "(acquisitionDate >= date'2013-01-01' AND acquisitionDate <= date'2016-12-31') AND (dayOfYear >=1 AND dayOfYear <= 366) AND (sensor = 'OLI') AND (cloudCover <= 20)" landsat7 = "(acquisitionDate >= date'2003-01-01' AND acquisitionDate <= date'2016-12-31') AND (dayOfYear >=1 AND dayOfYear <= 366) AND (sensor = 'ETM_SLC_OFF') AND (cloudCover <= 20)" landsat4_5 = "(acquisitionDate >= date'1982-01-01' AND acquisitionDate <= date'2011-12-31') AND (dayOfYear >=1 AND dayOfYear <= 366) AND (sensor = 'TM') AND (cloudCover <= 20)" landsat1_5 = "(acquisitionDate >= date'1972-01-01' AND acquisitionDate <= date'2013-12-31') AND (dayOfYear >=1 AND dayOfYear <= 366) AND (sensor = 'MSS') AND (cloudCover <= 20)" queries_name = ["landsat8","landsat7","landsat4_5","landsat1_5"] queries = [landsat8,landsat7,landsat4_5,landsat1_5] # query = self._query(parms) obj = [] count = 0 for q in queries: parms = { "f":"json", "where":q, "geometry":self.bounding_box["geometry"], "returnGeometry":"false", "spatialRel":"esriSpatialRelIntersects", "geometryType":"esriGeometryEnvelope", "inSR":self.bounding_box["geometry"]["spatialReference"]["wkid"], "outSR":self.bounding_box["geometry"]["spatialReference"]["wkid"], "outFields":"*", "orderByFields":"dayOfYear" } query = self._query(parms) bar = Bar("Requesting data: "+queries_name[count] , max=len(queries)) for i in query["features"]: obj.append(i) bar.next() bar.finish() count = count + 1 return obj
def main(argv): args = argparser.parse_args() print >> sys.stderr, '# Start: Matching: %s' % (datetime.datetime.now().time().isoformat()) masterbrain = read(args.masterbrain) keywords = read(args.keywords) bar = Bar('Processing', max=len(masterbrain), suffix ='%(percent).1f%% - %(eta)ds') regex = {} for keyword in keywords: regex[keyword] = re.compile(r'\b({0})\b'.format(keyword)) matches = 0 for string in masterbrain: for keyword in keywords: if regex[keyword].search(string): matches = matches + 1 print 1, "\t", string, "\t", keyword break else: print 0, "\t", string bar.next() bar.finish() print matches, "/", len(masterbrain) print >> sys.stderr, '# End: Matching: %s' % (datetime.datetime.now().time().isoformat())
def keyadd(name): bar = Bar('Processing', max=5) try: bar.next() nova('keypair-add', '--pub-key', '~/.ssh/id_rsa.pub', '%s' % name) except: # print "Key add error on %s" % name bar.next() try: bar.next() # print "Tryig to delete key" result = nova('keypair-delete', '%s' % name) # print result # print "Tryig to add key" bar.next() results = nova('keypair-add', '--pub-key', '~/.ssh/id_rsa.pub', '%s' % name) except: # print result print ''' Key deletion error on %s ''' % name bar.next() bar.finish() result = nova('keypair-list') print result
def hough(im, ntx=460, mry=360): pim = im.load() nimx, mimy = im.size mry = int(mry/2)*2 him = Image.new("L", (ntx, mry), 255) phim = him.load() rmax = hypot(nimx, mimy) dr = rmax / (mry/2) dth = pi / ntx bar = Bar('Processing', max=nimx) for jx in xrange(nimx): for iy in xrange(mimy): col = pim[jx, iy] if col == 255: continue for jtx in xrange(ntx): th = dth * jtx r = jx*cos(th) + iy*sin(th) iry = mry/2 + int(r/dr+0.5) try: phim[jtx, iry] -= 1 except: print 'error' bar.next() del bar return him
def main(argv): args = argparser.parse_args() print >> sys.stderr, '# Start: Adwords Data: %s, %s' % (args.cc, datetime.datetime.now().time().isoformat()) service = initialize_service() keywords = read_file(args.file) print '"%s"\t"%s"\t"%s"\t"%s"' % ("keyword", "sv (month)", "competition", "cpc ($)") bar = Bar('Processing', max=len(keywords), suffix ='%(percent).1f%% - %(eta)ds') if args.stats: # pagination of 800 items kws = keywords while len(kws) > 0: page = kws[0:PAGE_SIZE] kws = kws[PAGE_SIZE:] output(query_adwords(service, args.cc, page, "STATS")) bar.next(len(page)) elif args.ideas: # pagination of 1 item for kw in keywords: output(get_keyword_suggestions(service, args.cc, "IDEAS")) bar.next() bar.finish() print >> sys.stderr, '# End: Adwords Data: %s, %s' % (args.cc, datetime.datetime.now().time().isoformat())
def parse(self, dataset): """ :type dataset: nala.structures.data.Dataset """ outer_bar = Bar('Processing [SpaCy]', max=len(list(dataset.parts()))) for part in dataset.parts(): sentences = part.get_sentence_string_array() for index, sentence in enumerate(sentences): doc = self.nlp(sentence) for token in doc: tok = part.sentences[index][token.i] tok.features = { 'id': token.i, 'pos': token.tag_, 'dep': token.dep_, 'lemma': token.lemma_, 'prob': token.prob, 'is_punct': token.is_punct, 'is_stop': token.is_stop, 'cluster': token.cluster, 'dependency_from': None, 'dependency_to': [], 'is_root': False, } part.tokens.append(tok) for tok in doc: self._dependency_path(tok, index, part) part.percolate_tokens_to_entities() part.calculate_token_scores() part.set_head_tokens() outer_bar.next() outer_bar.finish() if self.constituency_parser == True: self.parser.parse(dataset)
def getUsers(hubname): log = open(HubAnalyzer.logfile, "a") print("hub: " + hubname + " ----------------- ", file=log) print(time.strftime("%H:%M:%S"), file=log) # clean the file to write users to url = HubAnalyzer.hubname2link(hubname) output_filename = "data/hubs/" + hubname # if data is here, do nothing if os.path.isfile(output_filename) and not HubAnalyzer.enforce_download_in_presence_of_data: print("data is already here, abort this url", file=log) return None output_file = open(output_filename, "w") try: last_page_num = int(HubAnalyzer.getLastPageNumber(url)) except Exception as err: print("URL is broken, abort the url", file=log) log.flush() os.remove(output_filename) raise Exception("Cannot analyze the page, please, check the url below: \n" + url) # get connection to habrahabr-hub suffix = "/subscribers/rating/page" userlist_url = url + suffix http = urllib3.PoolManager() if HubAnalyzer.report_downloading_progress: HubAnalyzer.get_hub_description(hubname) bar = Bar("Downloading: " + hubname, max=last_page_num, suffix="%(percent)d%%") for i in range(1, last_page_num + 1): user_page = userlist_url + str(i) print(user_page, file=log) log.flush() try: response = http.request("GET", user_page) except urllib3.exceptions.HTTPError as err: if err.code == 404: print(user_page + " !! 404 !!", file=log) log.flush() output_file.close() os.remove(output_filename) raise ("Hub is not found, please, check the url") else: print(user_page + " PARSING ERROR ", file=log) log.flush() output_file.close() os.remove(output_filename) raise Exception("Error: cannot parse the page!") html = response.data soup = BeautifulSoup(html) usersRow = soup.find_all(class_="user ") for userRow in usersRow: username = userRow.find(class_="username").text print(username, file=output_file) output_file.flush() if HubAnalyzer.report_downloading_progress: bar.next() # finalize and close everything if HubAnalyzer.report_downloading_progress: bar.finish() output_file.close() log.close()
def editorial_publish(guides, endpoint, function_class, user_agent, nailgun_bin, content_generator): """ takes care of publishing the editorial content for the guides. """ # init the nailgun thing for ed content generation. nailguninit(nailgun_bin,content_generator) searches= {} pbar = Bar('extracting editorial content for guides:',max=len(guides)+1) pbar.start() error = False for i, guide in enumerate(guides): jsonguide = None with open(guide,'r') as g: jsonguide = json.load(g) if not jsonguide: logging.error('could not load json from {0}'.format()) error = True continue search = cityinfo.cityinfo(jsonguide) uri = cityres.cityres(search,endpoint) if not uri: logging.error( 'no dbpedia resource was found for {0}'.format(guide)) error = True continue urls = urlinfer.urlinferdef([unquote(uri)]) if len(urls) < 1: logging.error('no wikipedia/wikivoyage urls found/inferred'\ ' for resource {0}'.format(uri)) error = True continue content = editorial_content(urls,function_class,user_agent) if not content: logging.error('no editorial content could be'\ ' generated for {0}'.format(guide)) error = True continue #insert the content into the guide jsonsert.jsonsert(content, guide) logging.info('editorial content for {0} sucessfully'\ ' inserted.'.format(guide)) pbar.next() pbar.finish() return error
def evolve(self, population, cxpb, mutpb, mutfq, ngen, goal): # Cheapest classifier. clf = LinearRegression(normalize=True) # Evaluate fitnesses of starting population. fitness_list = map(lambda x: self.evaluate(x, clf), population) # Assign fitness values. for individual, fitness in zip(population, fitness_list): individual.fitness.values = fitness best = max(population, key=lambda x: x.fitness.values[0]) # So that we know things are happening. bar = Bar('Evolving', max=ngen) # Evolution! for gen in xrange(ngen): if best.fitness.values[0] > goal: break # Select the next generation of individuals. offspring = [] offspring.append(best) offspring += tools.selTournament(population, len(population)-1, 10) offspring = map(self.toolbox.clone, offspring) # Apply crossovers. for child_a, child_b in zip(offspring[::2], offspring[1::2]): # Staggered. if random.random() < cxpb: self.crossover(child_a, child_b, cxpb) del child_a.fitness.values del child_b.fitness.values # Apply mutations. for child in offspring: if random.random() < mutpb: self.mutate(child, mutfq) del child.fitness.values # Reevaluate fitness of changed individuals. new_children = [e for e in offspring if not e.fitness.valid] fitness_list = map(lambda x: self.evaluate(x, clf), population) for individual, fitness in zip(new_children, fitness_list): individual.fitness.values = fitness # Replace old population with new generation. best = max(population, key=lambda x: x.fitness.values[0]) population = offspring # Progress! bar.next() # Done! Return the most fit evolved individual. bar.finish() return best
def do_epoch(mode, epoch, skipped=0): # mode is 'train' or 'test' y_true = [] y_pred = [] avg_loss = 0.0 prev_time = time.time() batches_per_epoch = dmn.get_batches_per_epoch(mode) if mode=="test": batches_per_epoch=min(1000,batches_per_epoch) bar=Bar('processing',max=batches_per_epoch) for i in range(0, batches_per_epoch): step_data = dmn.step(i, mode) prediction = step_data["prediction"] answers = step_data["answers"] current_loss = step_data["current_loss"] current_skip = (step_data["skipped"] if "skipped" in step_data else 0) log = step_data["log"] skipped += current_skip if current_skip == 0: avg_loss += current_loss for x in answers: y_true.append(x) for x in prediction.argmax(axis=1): y_pred.append(x) # TODO: save the state sometimes if (i % args.log_every == 0): cur_time = time.time() #print (" %sing: %d.%d / %d \t loss: %.3f \t avg_loss: %.3f \t skipped: %d \t %s \t time: %.2fs" % # (mode, epoch, i * args.batch_size, batches_per_epoch * args.batch_size, # current_loss, avg_loss / (i + 1), skipped, log, cur_time - prev_time)) prev_time = cur_time if np.isnan(current_loss): print "==> current loss IS NaN. This should never happen :) " exit() bar.next() bar.finish() avg_loss /= batches_per_epoch print "\n %s loss = %.5f" % (mode, avg_loss) print "confusion matrix:" print metrics.confusion_matrix(y_true, y_pred) accuracy = sum([1 if t == p else 0 for t, p in zip(y_true, y_pred)]) print "accuracy: %.2f percent" % (accuracy * 100.0 / batches_per_epoch / args.batch_size) if len(accuracies)>0 and accuracies[-1]>accuracy: dmn.lr=dmn.lr*args.learning_rate_decay accuracies.append(accuracy) return avg_loss, skipped
def get_stale_files(self, media_files): django_models_with_file_fields = self.get_django_models_with_file_fields() stale_files = [] bar = Bar('Analyzing media files', max=len(media_files)) for media_file in media_files: if not self.remove_file_if_not_exists_in_db(media_file, django_models_with_file_fields): stale_files.append(media_file) bar.next() bar.finish() return stale_files
def save_regions(reg_gen, num_regions, window_dims, save_dir): progressbar = ProgressBar('Saving regions', max=num_regions) index = 0 for img_region in itertools.islice(reg_gen, 0, num_regions): fname = os.path.join(save_dir, '{:06d}.png'.format(index)) index += 1 sample = img_region.load_cropped_resized_sample(window_dims) cv2.imwrite(fname, sample) progressbar.next() progressbar.finish()
def resample(self, rm, num_points): node = point.make(self.start.x, self.start.y) rm.insert(node) bar = Bar("Generating Roadmap", max=num_points) for i in xrange(num_points): sample = point.get_random_point(self.width, self.height) rm.insert(sample) bar.next() bar.finish() return rm
def update_api(contract_paths, old_api): bar = Bar("Contracts", max=len(contract_paths)) new_api = {"events": {}, "functions": {}} for contract_name, contract_path in contract_paths.items(): events_api, functions_api = update_contract_api(contract_name, contract_path, old_api) if bool(events_api): new_api["events"].update(events_api) new_api["functions"][contract_name] = functions_api bar.next() bar.finish() return new_api
def train(self, epoch): self.current_epoch = epoch if self.args.freeze and epoch > 10: self.model.freeze_weighting_of_rasc() self.optimizer_G = torch.optim.Adam( filter(lambda p: p.requires_grad, self.model.parameters()), lr=self.args.lr, betas=(0.5, 0.999), weight_decay=self.args.weight_decay) batch_time = AverageMeter() data_time = AverageMeter() LoggerLossG = AverageMeter() LoggerLossGGAN = AverageMeter() LoggerLossGL1 = AverageMeter() LoggerLossD = AverageMeter() LoggerLossDreal = AverageMeter() LoggerLossDfake = AverageMeter() lossMask8s = AverageMeter() lossMask4s = AverageMeter() lossMask2s = AverageMeter() # switch to train mode self.model.train() self.discriminator.train() end = time.time() bar = Bar('Processing {} '.format(self.args.arch), max=len(self.train_loader)) for i, (inputs, target) in enumerate(self.train_loader): input_image, mask, m2s, m4s, m8s = inputs current_index = len(self.train_loader) * epoch + i valid = torch.ones((input_image.size(0), self.patch, self.patch), requires_grad=False).cuda() fake = torch.zeros((input_image.size(0), self.patch, self.patch), requires_grad=False).cuda() reverse_mask = 1 - mask if self.args.gpu: input_image = input_image.cuda() mask = mask.cuda() m2s = m2s.cuda() m4s = m4s.cuda() m8s = m8s.cuda() reverse_mask = reverse_mask.cuda() target = target.cuda() valid.cuda() fake.cuda() # --------------- # Train model # -------------- self.optimizer_G.zero_grad() fake_input, mask8s, mask4s, mask2s = self.model( torch.cat((input_image, mask), 1)) pred_fake = self.discriminator(fake_input, input_image) loss_GAN = self.criterion_GAN(pred_fake, valid) loss_pixel = self.criterion_L1(fake_input, target) # fake in # here two choice: mseLoss or NLLLoss masked_loss8s = self.attentionLoss8s(mask8s, m8s) masked_loss4s = self.attentionLoss4s(mask4s, m4s) masked_loss2s = self.attentionLoss2s(mask2s, m2s) loss_G = loss_GAN + 100 * loss_pixel + 90 * masked_loss8s + 90 * masked_loss4s + 90 * masked_loss2s loss_G.backward() self.optimizer_G.step() self.optimizer_D.zero_grad() pred_real = self.discriminator(target, input_image) loss_real = self.criterion_GAN(pred_real, valid) pred_fake = self.discriminator(fake_input.detach(), input_image) loss_fake = self.criterion_GAN(pred_fake, fake) loss_D = 0.5 * (loss_real + loss_fake) loss_D.backward() self.optimizer_D.step() # --------------------- # Logger # --------------------- LoggerLossGGAN.update(loss_GAN.item(), input_image.size(0)) LoggerLossGL1.update(loss_pixel.item(), input_image.size(0)) LoggerLossG.update(loss_G.item(), input_image.size(0)) LoggerLossDfake.update(loss_real.item(), input_image.size(0)) LoggerLossDreal.update(loss_fake.item(), input_image.size(0)) LoggerLossD.update(loss_D.item(), input_image.size(0)) lossMask8s.update(masked_loss8s.item(), input_image.size(0)) lossMask4s.update(masked_loss4s.item(), input_image.size(0)) lossMask2s.update(masked_loss2s.item(), input_image.size(0)) # --------------------- # Visualize # --------------------- if i == 1: self.writer.add_images('train/Goutput', deNorm(fake_input), current_index) self.writer.add_images('train/target', deNorm(target), current_index) self.writer.add_images('train/input', deNorm(input_image), current_index) self.writer.add_images('train/mask', mask.repeat((1, 3, 1, 1)), current_index) self.writer.add_images('train/attention2s', mask2s.repeat(1, 3, 1, 1), current_index) self.writer.add_images('train/attention4s', mask4s.repeat(1, 3, 1, 1), current_index) self.writer.add_images('train/attention8s', mask8s.repeat(1, 3, 1, 1), current_index) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.2f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss D: {loss_d:.4f} | Loss G: {loss_g:.4f} | Loss L1: {loss_l1:.6f} '.format( batch=i + 1, size=len(self.train_loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss_d=LoggerLossD.avg, loss_g=LoggerLossGGAN.avg, loss_l1=LoggerLossGL1.avg) bar.next() bar.finish() self.writer.add_scalar('train/loss/GAN', LoggerLossGGAN.avg, epoch) self.writer.add_scalar('train/loss/D', LoggerLossD.avg, epoch) self.writer.add_scalar('train/loss/L1', LoggerLossGL1.avg, epoch) self.writer.add_scalar('train/loss/G', LoggerLossG.avg, epoch) self.writer.add_scalar('train/loss/Dreal', LoggerLossDreal.avg, epoch) self.writer.add_scalar('train/loss/Dfake', LoggerLossDfake.avg, epoch) self.writer.add_scalar('train/loss_Mask8s', lossMask8s.avg, epoch) self.writer.add_scalar('train/loss_Mask4s', lossMask4s.avg, epoch) self.writer.add_scalar('train/loss_Mask2s', lossMask2s.avg, epoch)
def verify_stats_cb(): """ Verify stats_cb """ def stats_cb(stats_json_str): global good_stats_cb_result stats_json = json.loads(stats_json_str) if topic in stats_json['topics']: app_offset = stats_json['topics'][topic]['partitions']['0'][ 'app_offset'] if app_offset > 0: print("# app_offset stats for topic %s partition 0: %d" % (topic, app_offset)) good_stats_cb_result = True conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': uuid.uuid1(), 'session.timeout.ms': 6000, 'error_cb': error_cb, 'stats_cb': stats_cb, 'statistics.interval.ms': 200, 'auto.offset.reset': 'earliest' } c = confluent_kafka.Consumer(conf) c.subscribe([topic]) max_msgcnt = 1000000 bytecnt = 0 msgcnt = 0 print('Will now consume %d messages' % max_msgcnt) if with_progress: bar = Bar('Consuming', max=max_msgcnt, suffix='%(index)d/%(max)d [%(eta_td)s]') else: bar = None while not good_stats_cb_result: # Consume until EOF or error msg = c.poll(timeout=20.0) if msg is None: raise Exception( 'Stalled at %d/%d message, no new messages for 20s' % (msgcnt, max_msgcnt)) if msg.error(): raise confluent_kafka.KafkaException(msg.error()) bytecnt += len(msg) msgcnt += 1 if bar is not None and (msgcnt % 10000) == 0: bar.next(n=10000) if msgcnt == 1: t_first_msg = time.time() if msgcnt >= max_msgcnt: break if bar is not None: bar.finish() if msgcnt > 0: t_spent = time.time() - t_first_msg print('%d messages (%.2fMb) consumed in %.3fs: %d msgs/s, %.2f Mb/s' % (msgcnt, bytecnt / (1024 * 1024), t_spent, msgcnt / t_spent, (bytecnt / t_spent) / (1024 * 1024))) print('closing consumer') c.close()
def _train_one_epoch(self): bar = Bar('Processing', max=len(self.train_data)) for step, (data, label) in enumerate(self.train_data): self.sigma = hm_kernel_size(self.hm_type, self.last_epoch, threshold=4) target = gene_heatmap(label, self.sigma) inputs = Variable(data) target = Variable(t.from_numpy(target)) if len(self.params.gpus) > 0: inputs = inputs.cuda() target = target.type(t.FloatTensor).cuda() # forward score = self.model(inputs) loss = 0 # stack hourglass for s in range(len(score)): loss += self.criterion(score[s], target) loss = loss / len(score) # simple pose res # loss = self.criterion(score[1], target) # backward self.optimizer.zero_grad() loss.backward() self.optimizer.step(None) # meters update self.loss_meter.add(loss.item()) # evaluation: calculate PCKh predictions = spatial_soft_argmax2d(score[len(score) - 1], 1000, False).cpu().numpy().reshape( -1, 2) targetcoors = label.numpy().reshape(-1, 2) steppckh, steperr = evalPCKh(predictions, targetcoors, threshold=50, alpha=0.2) # tensorboard show if step % 500 == 0: target_shows = t.sum(target[0], 0) target_shows[target_shows > 1] = 1 self.writer.add_image('train/input', inputs[0], self.last_epoch) self.writer.add_image('train/taget', target_shows, self.last_epoch, dataformats='HW') self.writer.add_image('train/output', t.sum(score[1][0], 0), self.last_epoch, dataformats='HW') bar.suffix = 'Train: [%(index)d/%(max)d] | Epoch: [{0}/{1}]| Loss: {loss:6f} | PCKh: {pckh:4f} | AveErr: {err:.2f} pixel |'.format( self.last_epoch, self.params.max_epoch, loss=loss, pckh=steppckh, err=steperr) bar.next() bar.finish()
def prefetch_test(opt): if not opt.not_set_cuda_env: os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str Dataset = dataset_factory[opt.test_dataset] opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) Logger(opt) split = 'val' if not opt.trainval else 'test' dataset = Dataset(opt, split) detector = Detector(opt) if opt.load_results != '': load_results = json.load(open(opt.load_results, 'r')) for img_id in load_results: for k in range(len(load_results[img_id])): if load_results[img_id][k][ 'class'] - 1 in opt.ignore_loaded_cats: load_results[img_id][k]['score'] = -1 else: load_results = {} data_loader = torch.utils.data.DataLoader(PrefetchDataset( opt, dataset, detector.pre_process), batch_size=1, shuffle=False, num_workers=1, pin_memory=True) results = {} num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters bar = Bar('{}'.format(opt.exp_id), max=num_iters) time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge', 'track'] avg_time_stats = {t: AverageMeter() for t in time_stats} if opt.use_loaded_results: for img_id in data_loader.dataset.images: results[img_id] = load_results['{}'.format(img_id)] num_iters = 0 for ind, (img_id, pre_processed_images) in enumerate(data_loader): if ind >= num_iters: break if opt.tracking and ('is_first_frame' in pre_processed_images): if '{}'.format(int(img_id.numpy().astype( np.int32)[0])) in load_results: pre_processed_images['meta']['pre_dets'] = \ load_results['{}'.format(int(img_id.numpy().astype(np.int32)[0]))] else: print() print('No pre_dets for', int(img_id.numpy().astype(np.int32)[0]), '. Use empty initialization.') pre_processed_images['meta']['pre_dets'] = [] detector.reset_tracking() print('Start tracking video', int(pre_processed_images['video_id'])) if opt.public_det: if '{}'.format(int(img_id.numpy().astype( np.int32)[0])) in load_results: pre_processed_images['meta']['cur_dets'] = \ load_results['{}'.format(int(img_id.numpy().astype(np.int32)[0]))] else: print('No cur_dets for', int(img_id.numpy().astype(np.int32)[0])) pre_processed_images['meta']['cur_dets'] = [] ret = detector.run(pre_processed_images) results[int(img_id.numpy().astype(np.int32)[0])] = ret['results'] Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format( ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td) # for t in avg_time_stats: # avg_time_stats[t].update(ret[t]) # Bar.suffix = Bar.suffix + '|{} {tm.val:.3f}s ({tm.avg:.3f}s) '.format( # t, tm = avg_time_stats[t]) if opt.print_iter > 0: if ind % opt.print_iter == 0: print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix)) else: bar.next() bar.finish() if opt.save_results: print( 'saving results to', opt.save_dir + '/save_results_{}{}.json'.format( opt.test_dataset, opt.dataset_version)) json.dump( _to_list(copy.deepcopy(results)), open( opt.save_dir + '/save_results_{}{}.json'.format( opt.test_dataset, opt.dataset_version), 'w')) dataset.run_eval(results, opt.data_dir, opt.save_dir)
def fit(self, train_domain, num_epochs, patience, optimizer, train_dir, dev_dir): """ Trains the model. :param train_domain: the domain used for training :param num_epochs: the max number of epochs the model should be trained :param patience: the patience to use for early stopping :param optimizer: the optimizer that should be used :param train_dir: the directory containing the training files :param dev_dir: the directory containing the development files """ print("Reading training data from %s..." % train_dir, flush=True) print("Tasks: %s" % self.task_names) train_X, train_Y, _, _, word2id, char2id, task2t2i = get_data( [train_domain], self.task_names, data_dir=train_dir, train=True) # get the development data of the same domain dev_X, dev_Y, org_X, org_Y, _, _, _ = get_data( [train_domain], self.task_names, word2id, char2id, task2t2i, data_dir=dev_dir, train=False) print('Length of training data:', len(train_X), flush=True) print('Length of validation data:', len(dev_X), flush=True) # store mappings of words and tags to indices self.set_indices(word2id, char2id, task2t2i) num_words = len(self.word2id) num_chars = len(self.char2id) print('Building the computation graph...', flush=True) self.predictors, self.char_rnn, self.wembeds, self.cembeds = \ self.build_computation_graph(num_words, num_chars) if optimizer == SGD: trainer = dynet.SimpleSGDTrainer(self.model) elif optimizer == ADAM: trainer = dynet.AdamTrainer(self.model) else: raise ValueError('%s is not a valid optimizer.' % optimizer) train_data = list(zip(train_X, train_Y)) num_iterations = 0 num_epochs_no_improvement = 0 best_dev_acc = 0 print('Training model with %s for %d epochs and patience of %d.' % (optimizer, num_epochs, patience)) for epoch in range(num_epochs): print('', flush=True) bar = Bar('Training epoch %d/%d...' % (epoch+1, num_epochs), max=len(train_data), flush=True) # keep track of the # of updates, total loss, and total # of # predicted instances per task task2num_updates = {task: 0 for task in self.task_names} task2total_loss = {task: 0.0 for task in self.task_names} task2total_predicted = {task: 0.0 for task in self.task_names} total_loss = 0.0 total_penalty = 0.0 total_predicted = 0.0 random.shuffle(train_data) # for every instance, we optimize the loss of the corresponding task for (word_indices, char_indices), task2label_id_seq in train_data: # get the concatenated word and char-based features for every # word in the sequence features = self.get_word_char_features(word_indices, char_indices) for task, y in task2label_id_seq.items(): placeholder_idx = self.task2tag2idx[task]['_'] if '_' in self.task2tag2idx[task] else -1 if task in [POS, CHUNK, NER, SRL]: output, penalty = self.predict(features, task, train=True) neg_logs = [pick_neg_log(pred, gold) for pred, gold in zip(output, y)] elif task in [STUTT, SAARB, TSVET, VUAMC, STUTT_M, SAARB_M, TSVET_M, VUAMC_M]: output, penalty = self.predict(features, task, train=True) # get the index for the placeholder label; if there is no placeholder in the data, set to -1, so that every sample is regarded neg_logs = [pick_neg_log(pred, gold) for pred, gold in zip(output, y) if gold != placeholder_idx] # if the sentence does not contain any literal or metaphor samples, skip it; do not consider it for training if not neg_logs: continue else: raise NotImplementedError('Task %s has not been ' 'implemented yet.' % task) # labels = {v: k for k,v in self.task2tag2idx[task].items()} loss = dynet.esum(neg_logs) # loss = dynet.esum([pick_neg_log(pred, gold) for pred, gold # in zip(output, y)]) lv = loss.value() # sum the loss and the subspace constraint penalty combined_loss = loss + dynet.parameter( self.constraint_weight_param, update=False) * penalty total_loss += lv total_penalty += penalty.value() assert len(output) == len(y) total_predicted += len([1 for gold in y if gold != placeholder_idx]) task2total_loss[task] += lv task2total_predicted[task] += len([1 for gold in y if gold != placeholder_idx]) task2num_updates[task] += 1 # back-propagate through the combined loss combined_loss.backward() trainer.update() bar.next() num_iterations += 1 print("\nEpoch %d. Total loss: %.3f. Total penalty: %.3f. Losses: " % (epoch, total_loss / total_predicted, total_penalty / total_predicted), end='', flush=True) for task in task2total_loss.keys(): if task2total_predicted[task] == 0: print('%s: %.3f/%.3f. ' % (task, task2total_loss[task], task2total_predicted[task]), end='', flush=True) else: print('%s: %.3f. ' % (task, task2total_loss[task] / task2total_predicted[task]), end='', flush=True) print('', flush=True) # evaluate after every epoch dev_acc = self.evaluate(dev_X, dev_Y, org_X=None, mode='nope') if dev_acc > best_dev_acc: print('Main task %s dev acc %.4f is greater than best dev acc ' '%.4f...' % (self.main_task, dev_acc, best_dev_acc), flush=True) best_dev_acc = dev_acc num_epochs_no_improvement = 0 print('Saving model to directory %s...' % self.model_dir, flush=True) self.save() self.evaluate(dev_X, dev_Y, org_X=org_X, mode='dev') else: print('Main task %s dev acc %.4f is lower than best dev acc ' '%.4f...' % (self.main_task, dev_acc, best_dev_acc), flush=True) num_epochs_no_improvement += 1 if num_epochs_no_improvement == patience: print('Early stopping...', flush=True) print('Loading the best performing model from %s...' % self.model_dir, flush=True) self.model.load(self.model_file) break
def getSingleTraining(file): path = os.path.abspath(file) pos = path.rfind('/') tokens = path[pos + 1:].split('_') descriptor_id = tokens[6] scene_name = tokens[2] scene_name = path[:pos] + '/' + scene_name + '_d.pcd' file_descriptor = path[:pos] + '/tmp' + descriptor_id + '.csv' labels = np.genfromtxt(file_descriptor, dtype='str', skip_header=1, delimiter=',') print('Affordances in descriptor %d' % labels.shape[0]) fileId = tokens[-1] tokens = fileId.split('.') fileId = tokens[0] # print(fileId) # # Need only those affordances that have # # over 128 good predictions in this result file # res_data_file=path[:pos]+'/'+fileId+'_goodPointsX.pcd' # res_points_file=path[:pos]+'/'+fileId+'_goodPoints.pcd' # data=load_pcd_data(res_data_file,cols=None) # #print(data.shape) # points,real_c_data=load_pcd_data_binary(res_points_file) # #real_c_data=load_pcd_data(res_points_file,cols=(3,),dataType=np.uint32) # #real_c_data=np.array(colors[:,-1],dtype=np.int32) # red=np.array((real_c_data>>16)& 0x0000ff,dtype=np.uint8).reshape(-1,1) # green=np.array((real_c_data>>8)& 0x0000ff,dtype=np.uint8).reshape(-1,1) # blue=np.array((real_c_data)& 0x0000ff,dtype=np.uint8).reshape(-1,1) # real_c_data=np.concatenate((red,green,blue),axis=1) # perPoint=np.sum(real_c_data,axis=1) # bounds=np.cumsum(perPoint) # #print(bounds) # howMany=np.zeros((labels.shape[0],1),dtype=np.int32) # all_data=np.zeros((data.shape[0],6)) # for i in range(all_data.shape[0]): # point_id=np.nonzero(bounds>i)[0][0] # all_data[i,:3]=points[point_id,:] # all_data[i,3:]=data[i,:3] # for i in range(labels.shape[0]): # success=np.nonzero(all_data[:,3]==i)[0] # #success2=np.nonzero(all_data[success,2]>0.2)[0] # howMany[i]=success.size # ids_target=np.nonzero(howMany>n_samples)[0] # print('Real found: %d'%ids_target.size) # print(ids_target) #sys.exit() new_c = np.genfromtxt('filtered_counts2.csv', delimiter=',', dtype='int') with open('file_lists2.csv', 'r') as f: reader = csv.reader(f) new_n = list(reader) samples = 32 points = 4096 ids_target = np.nonzero(new_c >= samples)[0] print('Actually using %d affordances' % (ids_target.size)) fig = plt.figure() plt.ion() ax = fig.add_subplot(121, projection='3d') ax2 = fig.add_subplot(122, projection='3d') unique_scenes = dict() k = 10 #ax.hold(False) if k > 1: bar = Bar('Creating original single example training dataset', max=ids_target.shape[0]) for i in range(ids_target.shape[0]): interaction = ids_target[i] path_to_data = os.path.abspath('../data') name = path_to_data + '/affordances/binaryOc_AffordancesDataset_train' + str( interaction) + '_' + str(TRAIN_EXAMPLES) + '.h5' if os.path.exists(name): continue #find training data aff_dir = labels[interaction, 0] query_object = labels[interaction, 2] data_file = path[:pos] + "/" + aff_dir + "/ibs_full_" + labels[ interaction, 1] + "_" + query_object + ".txt" with open(data_file) as f: content = f.readlines() # you may also want to remove whitespace characters like `\n` at the end of each line content = [x.strip() for x in content] scene_file = content[0].split(":")[1] tmp = content[8].split(":")[1] datapoint = tmp.split(',') test_point = np.expand_dims(np.asarray( [float(x) for x in datapoint]), axis=0) data_file = path[:pos] + "/" + aff_dir + "/" + scene_file if '.pcd' in scene_file or '.ply' in scene_file: if os.path.exists(data_file): data_file = data_file else: try_data_file = data_file + '.ply' if os.path.exists(try_data_file): #print(try_data_file) data_file = try_data_file #maybe pcd extension missing else: try_data_file = data_file + '.pcd' if os.path.exists(try_data_file): data_file = try_data_file # if scene_file not in unique_scenes: # unique_scenes[scene_file]=interaction # else: # continue if '.pcd' in data_file: cloud_training = load_pcd_data(data_file) else: cloud_training = load_ply_data(data_file) data = np.zeros((2, n_points, 3), dtype=np.float32) data_labels = np.zeros((2, 1), dtype=np.int32) boundingBoxDiag = np.linalg.norm( np.min(cloud_training, 0) - np.max(cloud_training, 0)) #print('%s Diagonal %f Points %d'%(scene_file,boundingBoxDiag,cloud_training.shape[0])) #sample a voxel with rad from test-point kdt = BallTree(cloud_training, leaf_size=5, metric='euclidean') voxel_ids = getVoxel(test_point, max_rad, kdt) voxel = cloud_training[voxel_ids, :] sample = sample_cloud(voxel, n_points) sample_cloud_training = sample_cloud(cloud_training, n_points * 2) #genereate a negative example with noise around test_point low = test_point[0, 0] - max_rad high = test_point[0, 0] + max_rad tmp1 = (high - low) * np.random.random_sample( (n_points, 1)) + (low) low = test_point[0, 1] - max_rad high = test_point[0, 1] + max_rad tmp2 = (high - low) * np.random.random_sample( (n_points, 1)) + (low) low = test_point[0, 2] - max_rad high = test_point[0, 2] + max_rad tmp3 = (high - low) * np.random.random_sample( (n_points, 1)) + (low) negative_cloud_training = np.concatenate((tmp1, tmp2, tmp3), axis=1) data[0, ...] = sample - test_point data_labels[0, ...] = np.zeros((1, 1), dtype=np.int32) data[1, ...] = negative_cloud_training - test_point data_labels[1, ...] = np.ones((1, 1), dtype=np.int32) #name=path_to_data+'/affordances/binaryOc_AffordancesDataset_train'+str(interaction)+'_'+str(TRAIN_EXAMPLES)+'.h5' #print(name) save_h5(name, data, data_labels, 'float32', 'uint8') ax.scatter(sample_cloud_training[:, 0], sample_cloud_training[:, 1], sample_cloud_training[:, 2], s=1, c='b') ax.scatter(sample[:, 0], sample[:, 1], sample[:, 2], s=3, c='b') ax2.scatter(negative_cloud_training[:, 0], negative_cloud_training[:, 1], negative_cloud_training[:, 2], s=3, c='r') plt.pause(1) plt.draw() ax.clear() ax2.clear() bar.next() bar.finish() name = '../data/affordances/names.txt' with open(name, "w") as text_file: for i in range(ids_target.shape[0]): text_file.write( "%d:%s-%s\n" % (i, labels[ids_target[i], 0], labels[ids_target[i], 2]))
def computeResultStats(descriptor_id): file_ids = getResults(descriptor_id) print('Found %d actual results' % (len(file_ids))) path = os.path.abspath(result_dirs[0]) print(path) file_descriptor = path + '/tmp' + str(descriptor_id) + '.csv' labels = np.genfromtxt(file_descriptor, dtype='str', skip_header=1, delimiter=',') print('Affordances in descriptor %d' % labels.shape[0]) counts = np.zeros((labels.shape[0], 1), dtype=np.int32) countsFile = "Counts_" + str(descriptor_id) + ".csv" if not 'some_counts' in globals(): # collect some data about affordances found here counter = 0 bar = Bar('Creating new data', max=len(file_ids)) for file_id in file_ids: #read results some_results = file_ids[file_id] + file_id + "_goodPointsX.pcd" #print('File to read: %s'%some_results) some_results_points = file_ids[ file_id] + file_id + "_goodPoints.pcd" newDataName = file_ids[file_id] + file_id + "_newData.csv" #if not os.path.exists(newDataName): try: # read_routine=1 # with open(some_results_points) as fp: # for i, line in enumerate(fp): # if i == 10: # words=line.split(" ") # if words[1]!="ascii": # read_routine=2 # break data, _ = load_pcd_data_binary(some_results) points, real_c_data = load_pcd_data_binary(some_results_points) except Exception as e: print('Encoding error in %s' % (file_ids[file_id] + file_id)) continue bar.next() #real_c_data=np.array(colors[:,-1],dtype=np.int32) red = np.array((real_c_data >> 16) & 0x0000ff, dtype=np.uint8).reshape(-1, 1) green = np.array((real_c_data >> 8) & 0x0000ff, dtype=np.uint8).reshape(-1, 1) blue = np.array((real_c_data) & 0x0000ff, dtype=np.uint8).reshape(-1, 1) real_c_data = np.concatenate((red, green, blue), axis=1) perPoint = np.sum(real_c_data, axis=1) bounds = np.cumsum(perPoint) #Only get points above a height minZ = np.min(points[:, 2]) all_data = np.zeros((data.shape[0], 6)) start_id = 0 end_id = bounds[0] for i in range(bounds.shape[0]): if i > 0: start_id = bounds[i - 1] else: start_id = 0 end_id = bounds[i] all_data[start_id:end_id, :3] = points[i, :] all_data[start_id:end_id, 3:] = data[start_id:end_id, :3] valid_ids = np.nonzero(all_data[:, Z] >= (minZ + 0.3))[0] data = all_data[valid_ids, :] np.savetxt(newDataName, data, delimiter=",", fmt='%1.6f') #else: #data=np.genfromtxt(newDataName,delimiter=",",dtype='float32') #np.savetxt(newDataName,data,delimiter=",",fmt='%1.6f') counter += 1 counts_tmp = np.bincount(data[:, A_ID].astype(int), minlength=counts.shape[0]) counts_tmp = np.expand_dims(counts_tmp, axis=1) counts += counts_tmp bar.next() bar.finish() else: counts = some_counts with open(countsFile, "w") as text_file: for i in range(labels.shape[0]): text_file.write("%d,%s-%s,%d\n" % (i, labels[i, 0], labels[i, 2], counts[i]))
def validate(self): self.model.eval() start = time.time() summary_string = '' bar = Bar('Validation', fill='#', max=len(self.test_loader)) if self.evaluation_accumulators is not None: for k,v in self.evaluation_accumulators.items(): self.evaluation_accumulators[k] = [] J_regressor = torch.from_numpy(np.load(osp.join(VIBE_DATA_DIR, 'J_regressor_h36m.npy'))).float() for i, target in enumerate(self.test_loader): # video = video.to(self.device) move_dict_to_device(target, self.device) # <============= with torch.no_grad(): inp = target['features'] # preds = self.model(inp, J_regressor=J_regressor, refiner = self.refiner) preds = self.model(inp, J_regressor=J_regressor) # convert to 14 keypoint format for evaluation # if self.use_spin: n_kp = preds[-1]['kp_3d'].shape[-2] pred_j3d = preds[-1]['kp_3d'].view(-1, n_kp, 3).cpu().numpy() target_j3d = target['kp_3d'].view(-1, n_kp, 3).cpu().numpy() pred_verts = preds[-1]['verts'].view(-1, 6890, 3).cpu().numpy() target_theta = target['theta'].view(-1, 85).cpu().numpy() ######################## vis ##################### # renderer = smpl_renderer.SMPL_Renderer(image_size = 400, camera_mode="look_at") # target_pose = target_theta[:,3:75] # pred_pose = preds[-1]['theta'][:,:,3:75].squeeze() # renderer.render_pose_vid(torch.tensor(target_pose), out_file_name = "output/gt{:02d}.mp4".format(i), random_camera = 2, random_shape=False) # renderer.render_pose_vid(torch.tensor(pred_pose), out_file_name = "output/ref{:02d}.mp4".format(i), random_camera = 2, random_shape=False) ######################## vis ##################### self.evaluation_accumulators['pred_verts'].append(pred_verts) self.evaluation_accumulators['target_theta'].append(target_theta) self.evaluation_accumulators['pred_j3d'].append(pred_j3d) self.evaluation_accumulators['target_j3d'].append(target_j3d) del target, preds torch.cuda.empty_cache() # =============> batch_time = time.time() - start summary_string = f'({i + 1}/{len(self.test_loader)}) | batch: {batch_time * 10.0:.4}ms | ' \ f'Total: {bar.elapsed_td} | ETA: {bar.eta_td:}' bar.suffix = summary_string bar.next() bar.finish() logger.info(summary_string)
model.zero_grad() predictions = model(text_chunk) # for each bptt size we have the same batch_labels loss = criterion(predictions, batch_label) bptt_loss += loss.item() # do back propagation for bptt steps in time loss.backward() optimizer.step() # after doing back prob, detach rnn state in order to implement TBPTT (truncated backpropagation through time startegy) # now rnn_state was detached and chain of gradeints was broken model.repackage_rnn_state() bar.next() epoch_loss += bptt_loss bar.finish() # mean epoch loss epoch_loss = epoch_loss / len(train_iter) time_elapsed = datetime.now() - start_time # progress bar = Bar(f'Validation Epoch {e}/{epoch}', max=len(valid_iter)) # evaluation loop model.eval() with torch.no_grad(): for batch_idx, batch in enumerate(valid_iter): # print(f'batch_idx={batch_idx}') batch_text = batch.text[0] #batch.text is a tuple
if metric < best_metric: best_metric = metric best_image = image best_triangle = list(triangle) if not best_image == None: # print "Best image (hard): %d (%d)" % (best_image, best_connections) # print " ", best_triangle best_image.tris.append(best_triangle) good_tris += 1 done = True if not done: # print "failed triangle" failed_tris += 1 count += 1 if count % update_steps == 0: bar.next(update_steps) bar.finish() print "good tris =", good_tris print "failed tris =", failed_tris # write out an ac3d file name = args.project + "/sba3d.ac" f = open(name, "w") f.write("AC3Db\n") trans = 0.0 f.write( "MATERIAL \"\" rgb 1 1 1 amb 0.6 0.6 0.6 emis 0 0 0 spec 0.5 0.5 0.5 shi 10 trans %.2f\n" % (trans)) f.write("OBJECT world\n") f.write("kids " + str(len(proj.image_list)) + "\n")
def validate(loader, model, criterion, netType, debug, flip): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acces = AverageMeter() end = time.time() # predictions predictions = torch.Tensor(loader.dataset.__len__(), 68, 2) model.eval() gt_win, pred_win = None, None bar = Bar('Validating', max=len(loader)) all_dists = torch.zeros((68, loader.dataset.__len__())) for i, (inputs, target, meta) in enumerate(loader): data_time.update(time.time() - end) input_var = torch.autograd.Variable(inputs.cuda()) target_var = torch.autograd.Variable(target.cuda(async=True)) output = model(input_var) score_map = output[-1].data.cpu() if flip: flip_input_var = torch.autograd.Variable( torch.from_numpy(shufflelr( inputs.clone().numpy())).float().cuda()) flip_output_var = model(flip_input_var) flip_output = flip_back(flip_output_var[-1].data.cpu()) score_map += flip_output # intermediate supervision loss = 0 for o in output: loss += criterion(o, target_var) acc, batch_dists = accuracy(score_map, target.cpu(), idx, thr=0.07) all_dists[:, i * args.val_batch:(i + 1) * args.val_batch] = batch_dists #preds = final_preds(score_map, meta['center'], meta['scale'], meta['reference_scale'], [64, 64]) pts, pts_img = get_preds_fromhm(score_map, meta['center'], meta['scale'], meta['reference_scale']) preds = pts_img for n in range(score_map.size(0)): predictions[meta['index'][n], :, :] = preds[n, :, :] if debug: gt_batch_img = batch_with_heatmap(inputs, target) pred_batch_img = batch_with_heatmap(inputs, score_map) if not gt_win or not pred_win: plt.subplot(121) gt_win = plt.imshow(gt_batch_img) plt.subplot(122) pred_win = plt.imshow(pred_batch_img) else: gt_win.set_data(gt_batch_img) pred_win.set_data(pred_batch_img) plt.pause(.05) plt.draw() losses.update(loss.data[0], inputs.size(0)) acces.update(acc[0], inputs.size(0)) batch_time.update(time.time() - end) end = time.time() bar.suffix = '({batch}/{size}) Data: {data:.6f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Acc: {acc: .4f}'.format( batch=i + 1, size=len(loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, acc=acces.avg) bar.next() bar.finish() mean_error = torch.mean(all_dists) auc = calc_metrics(all_dists) # this is auc of predicted maps and target. print("=> Mean Error: {:.2f}, [email protected]: {} based on maps".format( mean_error * 100., auc)) sys.stdout.flush() return losses.avg, acces.avg, predictions, auc
def train(loader, model, criterion, optimizer, netType, debug=False, flip=False): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acces = AverageMeter() model.train() end = time.time() gt_win, pred_win = None, None bar = Bar('Training', max=len(loader)) for i, (inputs, target) in enumerate(loader): data_time.update(time.time() - end) input_var = torch.autograd.Variable(inputs.cuda()) target_var = torch.autograd.Variable(target.cuda(async=True)) if debug: gt_batch_img = batch_with_heatmap(inputs, target) # pred_batch_img = batch_with_heatmap(inputs, score_map) if not gt_win or not pred_win: plt.subplot(121) gt_win = plt.imshow(gt_batch_img) # plt.subplot(122) # pred_win = plt.imshow(pred_batch_img) else: gt_win.set_data(gt_batch_img) # pred_win.set_data(pred_batch_img) plt.pause(.05) plt.draw() output = model(input_var) score_map = output[-1].data.cpu() if flip: flip_input_var = torch.autograd.Variable( torch.from_numpy(shufflelr( inputs.clone().numpy())).float().cuda()) flip_output_var = model(flip_input_var) flip_output = flip_back(flip_output_var[-1].data.cpu()) score_map += flip_output # intermediate supervision loss = 0 for o in output: loss += criterion(o, target_var) acc, _ = accuracy(score_map, target.cpu(), idx, thr=0.07) losses.update(loss.data[0], inputs.size(0)) acces.update(acc[0], inputs.size(0)) optimizer.zero_grad() #loss.backward(retain_graph=True) loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() bar.suffix = '({batch}/{size}) Data: {data:.6f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Acc: {acc: .4f}'.format( batch=i + 1, size=len(loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, acc=acces.avg) bar.next() bar.finish() return losses.avg, acces.avg
def main(): #Get command line input: try: conllinputfile = sys.argv[1] text_id = sys.argv[2] dbname = sys.argv[3] sl_dbtablename = sys.argv[4] tl_dbtablename = sys.argv[5] except: print('''Usage: {} <path to target language conll formatted text> <text id of the inserted source language text> <database name> <source language database table name> <target language database table name> '''.format(sys.argv[0])) sys.exit(0) #Connect to the database con = psycopg(dbname, 'juho') #read the conll data with open(conllinputfile, 'r') as f: conllinput = f.read() #fetch the id of the pair that is already inserted text_id = con.FetchQuery( "SELECT id FROM {} WHERE id = %s".format('text_ids'), (text_id, )) try: text_id = text_id[0][0] except IndexError: raise MissingTextError('No such id in the text_ids table') #Get all the align ids that were inserted with the first file align_ids = con.FetchQuery( "SELECT DISTINCT align_id FROM {} WHERE text_id = %s order by align_id" .format(sl_dbtablename), (text_id, )) # Split the translation file into aligned segments according to the !!!! -notation splitpattern = re.compile( r"\d+\t![^\n]+\n\n?\d+\t![^\n]+\n\n?\d+\t![^\n]+\n\n?\d+\t![^\n]+\n\n") alignsegments = re.split(splitpattern, conllinput) #Filter out empty align segments alignsegments = TrimList(alignsegments) #Test that same number of segments if len(alignsegments) != len(align_ids): raise AlignMismatch( 'The number of segments differs from the number in the source text: {}/{}' .format(len(alignsegments), len(align_ids))) #Get the current maximum indices: sentence_id = GetLastValue( con.FetchQuery( "SELECT max(sentence_id) FROM {}".format(tl_dbtablename))) #Insert a new entry in the translation_ids table translator = input('Give the author for this translation:\n') con.query( "INSERT INTO translation_ids (translator, sourcetext_id) VALUES(%s, %s)", ( translator, text_id, ), commit=True) translation_id = GetLastValue( con.FetchQuery( "SELECT max(id) FROM translation_ids WHERE sourcetext_id = %(sid)s", {'sid': text_id})) #Initialize variales for db insertion rowlist = list() bar = Bar('Preparing the data for insertion into the database', max=len(alignsegments)) #================================================================================ for idx, align_id in enumerate(align_ids): align_id = align_id[0] segment = alignsegments[idx] #Split each segment into lines (line=word with all the morphological and syntactic information) words = segment.splitlines() sentence_id += 1 for word in words: #read all the information about the word if word == '': #empty lines are sentence breaks sentence_id += 1 else: columns = word.split('\t') if len(columns) < 7: #If an empty segment encountered print('Note: an empty segment encountered at align_id {}'. format(align_id)) rowlist.append({ 'align_id': align_id, 'sentence_id': sentence_id, 'text_id': text_id, 'translation_id': translation_id, 'tokenid': 1, 'token': 'EMPTYSEGMENT', 'lemma': 'EMPTYSEGMENT', 'pos': 'EMPTYSEGMENT', 'feat': 'EMPTYSEGMENT', 'head': 0, 'deprel': 'EMPTY' }) else: #If this is a word with information, initialize a new row if sl_dbtablename == 'fi_conll': rowlist.append({ 'align_id': align_id, 'sentence_id': sentence_id, 'text_id': text_id, 'translation_id': translation_id, 'tokenid': columns[0], 'token': columns[1], 'lemma': columns[2], 'pos': columns[4], 'feat': columns[5], 'head': columns[6], 'deprel': columns[7] }) elif sl_dbtablename == 'ru_conll': rowlist.append({ 'align_id': align_id, 'sentence_id': sentence_id, 'text_id': text_id, 'translation_id': translation_id, 'tokenid': columns[0], 'token': columns[1], 'lemma': columns[2], 'pos': columns[4], 'feat': columns[6], 'head': columns[8], 'deprel': columns[10] }) bar.next() #================================================================================ bar.finish() print('\nInserting to database, this might take a while...') con.BatchInsert(tl_dbtablename, rowlist) print('Done. Inserted {} rows.'.format(con.cur.rowcount))
def validate(self, epoch): self.current_epoch = epoch batch_time = AverageMeter() data_time = AverageMeter() psnres = AverageMeter() ssimes = AverageMeter() lossMask8s = AverageMeter() lossMask4s = AverageMeter() lossMask2s = AverageMeter() # switch to evaluate mode self.model.eval() end = time.time() bar = Bar('Processing {} '.format(self.args.arch), max=len(self.val_loader)) with torch.no_grad(): for i, (inputs, target) in enumerate(self.val_loader): input_image, mask, m2s, m4s, m8s = inputs current_index = len(self.train_loader) * epoch + i valid = torch.ones( (input_image.size(0), self.patch, self.patch), requires_grad=False).cuda() fake = torch.zeros( (input_image.size(0), self.patch, self.patch), requires_grad=False).cuda() reverse_mask = 1 - mask if self.args.gpu: input_image = input_image.cuda() mask = mask.cuda() m2s = m2s.cuda() m4s = m4s.cuda() m8s = m8s.cuda() reverse_mask = reverse_mask.cuda() target = target.cuda() valid.cuda() fake.cuda() # 32,64,128 output, mask8s, mask4s, mask2s = self.model( torch.cat((input_image, mask), 1)) output = deNorm(output) target = deNorm(target) masked_loss8s = self.attentionLoss8s(mask8s, m8s) masked_loss4s = self.attentionLoss4s(mask4s, m4s) masked_loss2s = self.attentionLoss2s(mask2s, m2s) ## psnr and ssim calculator. mse = self.criterion_GAN(output, target) psnr = 10 * log10(1 / mse.item()) ssim = pytorch_ssim.ssim(output, target) psnres.update(psnr, input_image.size(0)) ssimes.update(ssim, input_image.size(0)) lossMask8s.update(masked_loss8s.item(), input_image.size(0)) lossMask4s.update(masked_loss4s.item(), input_image.size(0)) lossMask2s.update(masked_loss2s.item(), input_image.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.2f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | SSIM: {ssim:.4f} | PSNR: {psnr:.4f}'.format( batch=i + 1, size=len(self.val_loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, ssim=ssimes.avg, psnr=psnres.avg) bar.next() bar.finish() self.writer.add_scalar('val/SSIM', ssimes.avg, epoch) self.writer.add_scalar('val/PSNR', psnres.avg, epoch) self.writer.add_scalar('train/loss_Mask8s', lossMask8s.avg, epoch) self.writer.add_scalar('train/loss_Mask4s', lossMask4s.avg, epoch) self.writer.add_scalar('train/loss_Mask2s', lossMask2s.avg, epoch) self.metric = psnres.avg
from progress.bar import Bar categories = ["Ball", "Vase", "Corona", "Red", "Crown", "Grey_white"] colors = ["lime", "white", "orange", "red", "cyan", "pink"] files = set(glob("data/test/*.jpg")) #with open("data/test.txt") as f: # files = [l.rstrip("\n") for l in f] print(files) #files = [l.replace("data/data", "pre_data") for l in files] progress = Bar('Processing', max=len(files)) for f in files: progress.next() source = Image.open(f) draw = ImageDraw.Draw(source) img_width, img_height = source.size data = open(f.replace(".jpg", ".txt")) for d in data: params = d.split(" ") params = [float(p) for p in params] params[0] = int(params[0]) color = colors[params[0]] x_center = int(params[1] * img_width) y_center = int(params[2] * img_height)
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None): if split == 'train': model.train() else: model.eval() Loss, Acc = AverageMeter(), AverageMeter() preds = [] nIters = len(dataLoader) bar = Bar('{}'.format(opt.expID), max=nIters) for i, (input, target, _, meta) in enumerate(dataLoader): input_var = torch.autograd.Variable(input).float().cuda(opt.GPU) target_var = torch.autograd.Variable(target[0]).float().cuda(opt.GPU) output = model(input_var) if opt.DEBUG >= 2: gt = getPreds(target.cpu().numpy()) * 4 pred = getPreds((output[opt.nStack - 1].data).cpu().numpy()) * 4 debugger = Debugger() img = (input[0].numpy().transpose(1, 2, 0) * 256).astype( np.uint8).copy() debugger.addImg(img) debugger.addPoint2D(pred[0], (255, 0, 0)) debugger.addPoint2D(gt[0], (0, 0, 255)) debugger.showAllImg(pause=True) loss = criterion(output[0], target_var) for k in range(1, opt.nStack): loss += criterion(output[k], target_var) Loss.update(loss.item(), input.size(0)) Acc.update( Accuracy((output[opt.nStack - 1].data).cpu().numpy(), (target_var.data).cpu().numpy())) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: input_ = input.cpu().numpy() input_[0] = Flip(input_[0]).copy() inputFlip_var = torch.autograd.Variable( torch.from_numpy(input_).view(1, input_.shape[1], ref.inputRes, ref.inputRes)).float().cuda( opt.GPU) outputFlip = model(inputFlip_var) outputFlip = ShuffleLR( Flip((outputFlip[opt.nStack - 1].data).cpu().numpy()[0])).reshape( 1, ref.nJoints, ref.outputRes, ref.outputRes) output_ = old_div( ((output[opt.nStack - 1].data).cpu().numpy() + outputFlip), 2) preds.append( finalPreds(output_, meta['center'], meta['scale'], meta['rotate'])[0]) Bar.suffix = '{split} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | Acc {Acc.avg:.6f} ({Acc.val:.6f})'.format( epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split) bar.next() bar.finish() return {'Loss': Loss.avg, 'Acc': Acc.avg}, preds
def createDataSet(file): path = os.path.abspath(file) pos = path.rfind('/') tokens = path[pos + 1:].split('_') descriptor_id = tokens[6] scene_name = tokens[2] scene_name = path[:pos] + '/' + scene_name + '_d.pcd' file_descriptor = path[:pos] + '/tmp' + descriptor_id + '.csv' labels = np.genfromtxt(file_descriptor, dtype='str', skip_header=1, delimiter=',') print('Affordances in descriptor %d' % labels.shape[0]) fileId = tokens[-1] tokens = fileId.split('.') fileId = tokens[0] print(fileId) res_data_file = path[:pos] + '/' + fileId + '_goodPointsX.pcd' res_points_file = path[:pos] + '/' + fileId + '_goodPoints.pcd' data = load_pcd_data(res_data_file, cols=None) #print(data.shape) points = load_pcd_data(res_points_file, cols=(0, 1, 2)) real_c_data = load_pcd_data(res_points_file, cols=(3, ), dataType=np.uint32) #real_c_data=np.array(colors[:,-1],dtype=np.int32) red = np.array((real_c_data >> 16) & 0x0000ff, dtype=np.uint8).reshape(-1, 1) green = np.array((real_c_data >> 8) & 0x0000ff, dtype=np.uint8).reshape(-1, 1) blue = np.array((real_c_data) & 0x0000ff, dtype=np.uint8).reshape(-1, 1) real_c_data = np.concatenate((red, green, blue), axis=1) perPoint = np.sum(real_c_data, axis=1) bounds = np.cumsum(perPoint) #print(bounds) howMany = np.zeros((labels.shape[0], 1), dtype=np.int32) all_data = np.zeros((data.shape[0], 6)) for i in range(all_data.shape[0]): point_id = np.nonzero(bounds > i)[0][0] all_data[i, :3] = points[point_id, :] all_data[i, 3:] = data[i, :3] for i in range(labels.shape[0]): success = np.nonzero(all_data[:, 3] == i)[0] success2 = np.nonzero(all_data[success, 2] > 0.3)[0] howMany[i] = success2.size ids_target = np.nonzero(howMany > n_samples)[0] print('Real found: %d' % ids_target.size) if n_orientations > 1: name = 'AffordancesDataset_augmented_names.txt' else: name = 'AffordancesDataset_names.txt' with open(name, "w") as text_file: for i in range(ids_target.shape[0]): text_file.write( "%d:%s-%s\n" % (i, labels[ids_target[i], 0], labels[ids_target[i], 2])) # #print(labels[ids_target,1:]) all_points = np.zeros((ids_target.size, n_samples, 3)) all_points_score = np.zeros((ids_target.size, n_samples)) for i in range(ids_target.shape[0]): #get the 3D point for the response success = np.nonzero((all_data[:, 3] == ids_target[i]) & (all_data[:, 2] > 0.3))[0] sorted_ids = np.argsort(all_data[success, 5]) print( 'Sampling for %s %s in %d points(%f,%f)' % (labels[ids_target[i], 0], labels[ids_target[i], 2], success.size, np.max(all_data[success, 5]), np.min(all_data[success, 5]))) sorted_ids = sorted_ids[::-1] for j in range(n_samples): all_points[i, j, :] = all_data[success[sorted_ids[j]], :3] all_points_score[i, j] = all_data[success[sorted_ids[j]], 5] #print('Min %f max %f'%(all_points_score[i,0],all_points_score[i,-1])) labels_d = np.arange(ids_target.size) print( 'Sampled points maxZ %f minZ %f' % (np.max(all_points[:, :, 2].reshape( 1, -1)), np.min(all_points[:, :, 2].reshape(1, -1)))) #sys.exit() if n_orientations > 1: name = 'dataPointsAffordances_augmented.h5' else: name = 'dataPointsAffordances.h5' if os.path.exists(name): os.system('rm %s' % (name)) save_h5(name, all_points, labels_d, 'float32', 'uint8') #get dense cloud dense_sceneCloud = pypcd.PointCloud.from_path(scene_name).pc_data pc_array = np.array([[x, y, z] for x, y, z in dense_sceneCloud]) #generate pointclouds that were not detected to test against single example training good_points_file = path[:pos] + '/' + fileId + '_goodPointsIds.pcd' sampled_points_file = path[:pos] + '/' + fileId + '_samplePointsIds.pcd' sampled_ids = np.sort( load_pcd_data(sampled_points_file, cols=(0, ), dataType=np.int32)) good_ids = np.sort( load_pcd_data(good_points_file, cols=(0, ), dataType=np.int32)) non_affordance = np.setdiff1d(np.arange(sampled_ids.shape[0]), good_ids) sampled_points_file = path[:pos] + '/' + fileId + '_samplePoints.pcd' sampled_points = load_pcd_data(sampled_points_file, cols=(0, 1, 2)) np.random.shuffle(non_affordance) print('Getting 1024 negative examples ') #shuffle negative examples ids bar = Bar('Processing', max=1024) negative_examples = np.zeros((1024, n_points, 3), dtype=np.float32) for i in range(1024): point = pc_array[non_affordance[i], ...] voxel = getVoxel(point, max_rad, pc_array) minP = np.min(voxel, 0) maxP = np.max(voxel, 0) dist = np.linalg.norm(maxP - minP, axis=0) / 2 print('RAD %f rad %f estimation %f' % (dist, max_rad, max_rad * np.sqrt(3))) sample = sample_cloud(voxel, n_points) negative_examples[i, ...] = sample bar.next() bar.finish() negative_labels = 100 * np.ones((1024, 1), dtype=np.uint8) print('Got %d negative examples' % (negative_examples.shape[0])) print(negative_examples[0, 0, :]) name = 'AffordancesDataset_negatives.h5' if os.path.exists(name): os.system('rm %s' % (name)) save_h5(name, negative_examples, negative_labels, 'float32', 'uint8') #sys.exit() print('Sampling actual voxels from %s of %d points' % (scene_name, pc_array.shape[0])) dataSet_data = np.zeros( (all_points.shape[0] * all_points.shape[1] * n_orientations, n_points, 3), dtype=np.float32) dataSet_labels = np.zeros( (all_points.shape[0] * all_points.shape[1] * n_orientations, 1), dtype=np.uint8) print(dataSet_data.shape) count = 0 #data_type 0->centered data_type = 1 #extract voxels and pointclouds for dataset fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.hold(False) for aff in range(all_points.shape[0]): print('Training examples for %s %s' % (labels[ids_target[aff], 0], labels[ids_target[aff], 2])) bar = Bar('Processing', max=all_points.shape[1]) for n_sample in range(all_points.shape[1]): point = all_points[aff, n_sample, :].reshape(3, -1) #print(point.shape) voxel = getVoxel(point, max_rad, pc_array) if voxel.shape[0] < n_points: sample = aVoxel else: sample = sample_cloud(voxel, n_points) if data_type == 0: centered_sample = sample - point else: centered_sample = sample #rotate this voxels n_orientations around Z (up) for j in range(n_orientations): rotated_voxel = rotate_point_cloud_by_angle( np.expand_dims(centered_sample, axis=0), j * 2 * np.pi / n_orientations).squeeze() dataSet_data[count, ...] = rotated_voxel dataSet_labels[count] = labels_d[aff] count += 1 if n_sample == 0: ax.scatter(rotated_voxel[:, 0], rotated_voxel[:, 1], rotated_voxel[:, 2], s=3) plt.pause(0.2) plt.draw() bar.next() bar.finish() if n_orientations > 1: name = 'AffordancesDataset_augmented.h5' else: name = 'AffordancesDataset.h5' if os.path.exists(name): os.system('rm %s' % (name)) save_h5(name, dataSet_data, dataSet_labels, 'float32', 'uint8')
def validation(model, val_loader, epoch, writer): # set evaluate mode model.eval() total_correct, total_label = 0, 0 total_correct_hb, total_label_hb = 0, 0 total_correct_fb, total_label_fb = 0, 0 hist = np.zeros((args.num_classes, args.num_classes)) hist_hb = np.zeros((args.hbody_cls, args.hbody_cls)) hist_fb = np.zeros((args.fbody_cls, args.fbody_cls)) # Iterate over data. bar = Bar('Processing {}'.format('val'), max=len(val_loader)) bar.check_tty = False for idx, batch in enumerate(val_loader): image, target, hlabel, flabel, _ = batch image, target, hlabel, flabel = image.cuda(), target.cuda( ), hlabel.cuda(), flabel.cuda() with torch.no_grad(): h, w = target.size(1), target.size(2) outputs = model(image) outputs = gather(outputs, 0, dim=0) preds = F.interpolate(input=outputs[0][-1], size=(h, w), mode='bilinear', align_corners=True) preds_hb = F.interpolate(input=outputs[1][-1], size=(h, w), mode='bilinear', align_corners=True) preds_fb = F.interpolate(input=outputs[2][-1], size=(h, w), mode='bilinear', align_corners=True) if idx % 50 == 0: img_vis = inv_preprocess(image, num_images=args.save_num) label_vis = decode_predictions(target.int(), num_images=args.save_num, num_classes=args.num_classes) pred_vis = decode_predictions(torch.argmax(preds, dim=1), num_images=args.save_num, num_classes=args.num_classes) # visual grids img_grid = torchvision.utils.make_grid( torch.from_numpy(img_vis.transpose(0, 3, 1, 2))) label_grid = torchvision.utils.make_grid( torch.from_numpy(label_vis.transpose(0, 3, 1, 2))) pred_grid = torchvision.utils.make_grid( torch.from_numpy(pred_vis.transpose(0, 3, 1, 2))) writer.add_image('val_images', img_grid, epoch * len(val_loader) + idx + 1) writer.add_image('val_labels', label_grid, epoch * len(val_loader) + idx + 1) writer.add_image('val_preds', pred_grid, epoch * len(val_loader) + idx + 1) # pixelAcc correct, labeled = batch_pix_accuracy(preds.data, target) correct_hb, labeled_hb = batch_pix_accuracy(preds_hb.data, hlabel) correct_fb, labeled_fb = batch_pix_accuracy(preds_fb.data, flabel) # mIoU hist += fast_hist(preds, target, args.num_classes) hist_hb += fast_hist(preds_hb, hlabel, args.hbody_cls) hist_fb += fast_hist(preds_fb, flabel, args.fbody_cls) total_correct += correct total_correct_hb += correct_hb total_correct_fb += correct_fb total_label += labeled total_label_hb += labeled_hb total_label_fb += labeled_fb pixAcc = 1.0 * total_correct / (np.spacing(1) + total_label) IoU = round(np.nanmean(per_class_iu(hist)) * 100, 2) pixAcc_hb = 1.0 * total_correct_hb / (np.spacing(1) + total_label_hb) IoU_hb = round(np.nanmean(per_class_iu(hist_hb)) * 100, 2) pixAcc_fb = 1.0 * total_correct_fb / (np.spacing(1) + total_label_fb) IoU_fb = round(np.nanmean(per_class_iu(hist_fb)) * 100, 2) # plot progress bar.suffix = '{} / {} | pixAcc: {pixAcc:.4f}, mIoU: {IoU:.4f} |' \ 'pixAcc_hb: {pixAcc_hb:.4f}, mIoU_hb: {IoU_hb:.4f} |' \ 'pixAcc_fb: {pixAcc_fb:.4f}, mIoU_fb: {IoU_fb:.4f}'.format(idx + 1, len(val_loader), pixAcc=pixAcc, IoU=IoU, pixAcc_hb=pixAcc_hb, IoU_hb=IoU_hb, pixAcc_fb=pixAcc_fb, IoU_fb=IoU_fb) bar.next() print('\n per class iou part: {}'.format(per_class_iu(hist) * 100)) print('per class iou hb: {}'.format(per_class_iu(hist_hb) * 100)) print('per class iou fb: {}'.format(per_class_iu(hist_fb) * 100)) mIoU = round(np.nanmean(per_class_iu(hist)) * 100, 2) mIoU_hb = round(np.nanmean(per_class_iu(hist_hb)) * 100, 2) mIoU_fb = round(np.nanmean(per_class_iu(hist_fb)) * 100, 2) writer.add_scalar('val_pixAcc', pixAcc, epoch) writer.add_scalar('val_mIoU', mIoU, epoch) writer.add_scalar('val_pixAcc_hb', pixAcc_hb, epoch) writer.add_scalar('val_mIoU_hb', mIoU_hb, epoch) writer.add_scalar('val_pixAcc_fb', pixAcc_fb, epoch) writer.add_scalar('val_mIoU_fb', mIoU_fb, epoch) bar.finish() return pixAcc, mIoU
def sampleFromFile(affordance, list_of_files, number_of_samples, pointsPerCloud=4096): file_options = np.arange(len(list_of_files)) files_to_sample = np.random.randint(len(list_of_files), size=(1, number_of_samples)) repeated = np.bincount(files_to_sample[0, :], minlength=len(list_of_files)) actually_sample_files = np.nonzero(repeated)[0] dataPoints = np.empty((number_of_samples, 6), dtype=np.float) dataClouds = np.empty((number_of_samples, pointsPerCloud, 3), dtype=np.float32) start_id = 0 actually_sampled = 0 outOfPoints = False bar = Bar('Sampling ', max=number_of_samples) for i in range(actually_sample_files.size): file = list_of_files[actually_sample_files[i]] + "_newData.csv" pos = file.rfind('/') + 1 if "space/" in file: #Need to search for the exact file pos_id = list_of_files[actually_sample_files[i]].rfind('/') + 1 target_file_id = list_of_files[actually_sample_files[i]][pos_id:] path_to_scene = file[: pos_id] + 'All_affordances_*_' + target_file_id + '.pcd' someFile = glob.glob(path_to_scene) tokens = someFile[0].split('_') cloud_file = list_of_files[ actually_sample_files[i]][:pos_id] + tokens[2] if "real" in tokens[2]: cloud_file = cloud_file + ".pcd" else: cloud_file = cloud_file + "_d.pcd" #if "readingroom" in cloud_file: #print(list_of_files[actually_sample_files[i]]) #print(cloud_file) #sys.exit() else: pos_id = list_of_files[actually_sample_files[i]].rfind('/') + 1 target_file_id = list_of_files[actually_sample_files[i]][pos_id:] if "DATA" in file[:pos_id]: path_to_scene = file[:pos_id] + '*_clean.pcd' someFile = glob.glob(path_to_scene) cloud_file = someFile[0] else: path_to_scene = file[: pos_id] + 'All_affordances_*_' + target_file_id + '.pcd' someFile = glob.glob(path_to_scene) tokens = someFile[0].split('_') cloud_file = list_of_files[ actually_sample_files[i]][:pos_id] + tokens[2] + '.pcd' #print(cloud_file) #sys.exit() sample_from_file = repeated[actually_sample_files[i]] data = np.genfromtxt(file, delimiter=",", dtype='float32') target_ids = np.nonzero(data[:, A_ID].astype(int) == affordance)[0] sorted_subset = np.argsort(data[target_ids, SCORE]) sorted_subset = sorted_subset[::-1] j = 0 k = 0 complete_sample = False if not os.path.exists(cloud_file): print('No input cloud %s' % (cloud_file)) return np.empty((0, 6)), np.empty((0, 0, 0)) cloud, _ = load_pcd_data_binary(cloud_file) kdt = BallTree(cloud, leaf_size=5, metric='euclidean') while not complete_sample: #take points until conplete set dataPoints[start_id + j, :] = data[target_ids[sorted_subset[k]], :] point = dataPoints[start_id + j, :3] voxel_ids = getVoxel(point, max_rad, kdt) voxel = cloud[voxel_ids, :] actual_voxel_size = voxel.shape[0] if actual_voxel_size < (pointsPerCloud / 4): #bad point, get a new one if k == 0: print("\n File %s" % (cloud_file)) outputText = "Voxel " + str( voxel.shape[0]) + " " + str(k) + "/" + str( sorted_subset.shape[0]) print(outputText, end='\r') #print('\nFile: %s bad point %d/%d\r'%(someFile[0],k,sorted_subset.shape[0])) #print('bad point %d of %d Voxel: %d'%(k,sorted_subset.shape[0],voxel.shape[0])) k += 1 if k >= sorted_subset.shape[0]: outOfPoints = True print('Exhausted File') break else: if actual_voxel_size >= pointsPerCloud: sample = sample_cloud(voxel, pointsPerCloud) else: print('padding') padding = point + np.zeros( (pointsPerCloud - actual_voxel_size, 3), dtype=np.float32) sample = np.concatenate((padding, voxel), axis=0) #center cloud dataClouds[start_id + j, ...] = sample - point j += 1 #print('\tVoxel size (%d,%d) SampleSize(%d,%d) start_id %d +j %d'%(voxel.shape[0],voxel.shape[1],sample.shape[0],sample.shape[1],start_id,j)) if j == sample_from_file: complete_sample = True if not outOfPoints: start_id += sample_from_file actually_sampled += sample_from_file bar.next(sample_from_file) else: break bar.finish() if outOfPoints or actually_sampled != number_of_samples: return np.empty((0, 6)), np.empty((0, 0, 0)) else: return dataPoints, dataClouds
def mostImportantFormat(output_path, pages): # Counter to store images of each page of PDF to image image_counter = 1 # Iterate through all the pages stored above for page in pages: # Declaring filename for each page of PDF as JPG filename = os.path.join(output_path, "page_" + str(image_counter) + ".jpg") # Save the image of the page in system page.save(filename, 'JPEG') # Increment the counter to update filename image_counter = image_counter + 1 # Variable to get count of total number of pages filelimit = image_counter - 1 # Creating a text file to write the output outfile = "out_text0.txt" f = open(outfile, "a") #Progress Bar bar = Bar('Processing', max=filelimit) # Iterate from 1 to total number of pages for i in range(1, filelimit + 1): filepath = os.path.join(output_path, "page_" + str(i) + ".jpg") # load the original image image = cv2.imread(filepath) # convert the image to black and white for better OCR ret, thresh1 = cv2.threshold(image, 120, 255, cv2.THRESH_BINARY) # pytesseract image to string to get results text = str(pytesseract.image_to_string(thresh1, config='--psm 6')) # Split the entire text into lines and store in a list arr = text.split("\n") # Flag to check when to start parsing lines start_flag = False for each in arr: # If the line has gender/age texts then print it if "Gender/Age" in each and i == 1: f.write(each + "\n") continue # If the line has Name then print it and start parsing lines from here onwards if "Name" in each and "Value" not in each: start_flag = True if i == 1: f.write(each + "\n") continue # If the line is a valid row print it, else move to next if start_flag: row_arr = each.split() if isValidRow(row_arr): f.write(each + "\n") # Increment the terminal progress bar bar.next() try: #Delete all created images shutil.rmtree(output_path) os.mkdir(output_path) except Exception as e: print("Error occurred while deleting images : " + str(e)) bar.finish() # Close the file after writing all the text. f.close()
def run_epoch(self, phase, epoch, data_loader, rank): model_with_loss = self.model_with_loss if phase == 'train': model_with_loss.train() else: model_with_loss.eval() torch.cuda.empty_cache() results = {} data_time, batch_time = AverageMeter(), AverageMeter() avg_loss_stats = { l: AverageMeter() for l in self.loss_stats if l in ('tot', 'hm', 'wh', 'tracking') } num_iters = len( data_loader ) if self.args.num_iters[phase] < 0 else self.args.num_iters[phase] bar = Bar('{}'.format("tracking"), max=num_iters) end = time.time() for iter_id, batch in enumerate(data_loader): if iter_id >= num_iters: break data_time.update(time.time() - end) for k in batch: if k in ('fpath', 'prev_fpath'): continue if type(batch[k]) != list: batch[k] = batch[k].to(self.args.device, non_blocking=True) else: for i in range(len(batch[k])): batch[k][i] = batch[k][i].to(self.args.device, non_blocking=True) output, loss, loss_stats = model_with_loss(batch) loss = loss.mean() if phase == 'train': self.optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model_with_loss.parameters(), self.args.clip_value) self.optimizer.step() batch_time.update(time.time() - end) end = time.time() Bar.suffix = '{phase}: [{0}][{1}/{2}]| '.format(epoch, iter_id, num_iters, phase=phase) for l in avg_loss_stats: avg_loss_stats[l].update(loss_stats[l].mean().item(), batch['image'].size(0)) Bar.suffix = Bar.suffix + '|{} {:.4f} '.format( l, avg_loss_stats[l].avg) Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \ '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) if rank == 0 and phase == 'val' and self.args.write_mota_metrics and epoch in self.args.save_point: curr_name = None tracker = None for i in range(self.args.batch_size): try: fpath = batch['fpath'][i] except IndexError: break fpath = fpath.split('.')[0].split('/')[-1] name, num = fpath.split("_frame_") num = int(num) if num % self.args.val_select_frame != 0: continue if name != curr_name: curr_name = name tracker = Tracker(self.args) out = [x[i][None] for x in output] res = out dets = generic_decode( {k: res[t] for (t, k) in enumerate(self.args.heads)}, self.args.max_objs, self.args) for k in dets: dets[k] = dets[k].detach().cpu().numpy() if not tracker.init and len(dets) > 0: tracker.init_track(dets) elif len(dets) > 0: tracker.step(dets) with open(os.path.join(self.args.res_dir, fpath + '.txt'), "w") as f: for track in tracker.tracks: x1, y1, x2, y2 = track['bbox'] f.write("{} {} {} {} {} {}\n".format( track['score'], track['tracking_id'], x1, y1, x2, y2)) if rank == 0 and self.args.print_iter > 0: # If not using progress bar if iter_id % self.args.print_iter == 0: print('{}| {}'.format("tracking", Bar.suffix)) else: bar.next() del output, loss, loss_stats if rank == 0 and phase == 'val' and self.args.write_mota_metrics and epoch in self.args.save_point: self.compute_map(epoch) bar.finish() ret = {k: v.avg for k, v in avg_loss_stats.items()} ret['time'] = bar.elapsed_td.total_seconds() / 60. return ret, results
def save_samples(self, purpose_hdf5_group: h5py.Group, logdir: pathlib.Path): logdir.mkdir(exist_ok=True, parents=True) data_hdf5_group = purpose_hdf5_group[f"data"] dataset_length = len(data_hdf5_group[ChannelEnum.REC_DEM.value]) num_samples = int(dataset_length / self.config["sample_frequency"]) progress_bar = Bar(f"Plot samples for {str(purpose_hdf5_group.name)}", max=num_samples) for sample_idx in range(num_samples): idx = sample_idx * self.config["sample_frequency"] res_grid = data_hdf5_group[ChannelEnum.RES_GRID.value][idx, ...] rel_position = data_hdf5_group[ChannelEnum.REL_POSITION.value][idx, ...] rec_dem = data_hdf5_group[ChannelEnum.REC_DEM.value][idx, ...] occluded_elevation_map = data_hdf5_group[ ChannelEnum.OCC_DEM.value][idx, ...] comp_dem = data_hdf5_group[ChannelEnum.COMP_DEM.value][idx, ...] gt_dem = None if ChannelEnum.GT_DEM.value in data_hdf5_group: gt_dem = data_hdf5_group[ChannelEnum.GT_DEM.value][idx, ...] non_occluded_elevation_map = occluded_elevation_map[ ~np.isnan(occluded_elevation_map)] rec_data_um = None if ChannelEnum.REC_DATA_UM.value in data_hdf5_group: rec_data_um = data_hdf5_group[ChannelEnum.REC_DATA_UM.value][ idx, ...] comp_data_um = None if ChannelEnum.COMP_DATA_UM.value in data_hdf5_group: comp_data_um = data_hdf5_group[ChannelEnum.COMP_DATA_UM.value][ idx, ...] model_um = None if ChannelEnum.MODEL_UM.value in data_hdf5_group: model_um = data_hdf5_group[ChannelEnum.MODEL_UM.value][idx, ...] total_um = None if ChannelEnum.TOTAL_UM.value in data_hdf5_group: total_um = data_hdf5_group[ChannelEnum.TOTAL_UM.value][idx, ...] rec_dems = None if ChannelEnum.REC_DEMS.value in data_hdf5_group: rec_dems = data_hdf5_group[ChannelEnum.REC_DEMS.value][idx, ...] comp_dems = None if ChannelEnum.COMP_DEMS.value in data_hdf5_group: comp_dems = data_hdf5_group[ChannelEnum.COMP_DEMS.value][idx, ...] u = int( round(occluded_elevation_map.shape[0] / 2 + rel_position[0] / res_grid[0])) v = int( round(occluded_elevation_map.shape[1] / 2 + rel_position[1] / res_grid[1])) # we only visualize the robot position if its inside the elevation map plot_robot_position = 0 < u < occluded_elevation_map.shape[ 0] and 0 < v < occluded_elevation_map.shape[1] if plot_robot_position: robot_position_pixel = np.array([u, v]) else: robot_position_pixel = None indiv_vranges = self.config.get("indiv_vranges", True) # 2D if indiv_vranges is False: elevation_vmin = np.min( [np.min(rec_dem), np.min(comp_dem[~np.isnan(comp_dem)])]) elevation_vmax = np.max( [np.max(rec_dem), np.max(comp_dem[~np.isnan(comp_dem)])]) if non_occluded_elevation_map.size != 0: elevation_vmin = np.min( [elevation_vmin, np.min(non_occluded_elevation_map)]) elevation_vmax = np.max( [elevation_vmax, np.max(non_occluded_elevation_map)]) if gt_dem is not None and np.isnan(gt_dem).all() is False: ground_truth_dem_vmin = np.min(gt_dem[~np.isnan(gt_dem)]) ground_truth_dem_vmax = np.max(gt_dem[~np.isnan(gt_dem)]) elevation_vmin = np.min( [elevation_vmin, ground_truth_dem_vmin]) elevation_vmax = np.max( [elevation_vmax, ground_truth_dem_vmax]) else: elevation_vmin = None elevation_vmax = None elevation_cmap = plt.get_cmap("viridis") fig, axes = plt.subplots(nrows=2, ncols=2, figsize=[12, 10]) # axes = np.expand_dims(axes, axis=0) if gt_dem is not None: axes[0, 0].set_title("Ground-truth") # matshow plots x and y swapped mat = axes[0, 0].matshow(np.swapaxes(gt_dem, 0, 1), vmin=elevation_vmin, vmax=elevation_vmax, cmap=elevation_cmap) if indiv_vranges: fig.colorbar(mat, ax=axes[0, 0], fraction=0.08) axes[0, 1].set_title("Reconstruction") # matshow plots x and y swapped mat = axes[0, 1].matshow(np.swapaxes(rec_dem, 0, 1), vmin=elevation_vmin, vmax=elevation_vmax, cmap=elevation_cmap) if indiv_vranges: fig.colorbar(mat, ax=axes[0, 1], fraction=0.08) axes[1, 0].set_title("Composition") # matshow plots x and y swapped mat = axes[1, 0].matshow(np.swapaxes(comp_dem, 0, 1), vmin=elevation_vmin, vmax=elevation_vmax, cmap=elevation_cmap) if indiv_vranges: fig.colorbar(mat, ax=axes[1, 0], fraction=0.08) axes[1, 1].set_title("Occlusion") # matshow plots x and y swapped mat = axes[1, 1].matshow(np.swapaxes(occluded_elevation_map, 0, 1), vmin=elevation_vmin, vmax=elevation_vmax, cmap=elevation_cmap) if indiv_vranges: fig.colorbar(mat, ax=axes[1, 1], fraction=0.08) if indiv_vranges is False: fig.colorbar(mat, ax=axes.ravel().tolist(), fraction=0.045) for i, ax in enumerate(axes.reshape(-1)): if plot_robot_position: ax.plot([u], [v], marker="*", color="red") # Hide grid lines ax.grid(False) plt.draw() plt.savefig(str(logdir / f"sample_2d_{idx}.pdf")) if self.remote is not True: plt.show() plt.close() # 3D fig = plt.figure(figsize=[2 * 6.4, 1 * 4.8]) plt.clf() axes = [] num_cols = 3 x_3d = np.arange( start=-int(occluded_elevation_map.shape[0] / 2), stop=int(occluded_elevation_map.shape[0] / 2)) * res_grid[0] y_3d = np.arange( start=-int(occluded_elevation_map.shape[1] / 2), stop=int(occluded_elevation_map.shape[1] / 2)) * res_grid[1] x_3d, y_3d = np.meshgrid(x_3d, y_3d) axes.append( fig.add_subplot(100 + num_cols * 10 + 1, projection="3d")) # the np.NaNs in the occluded elevation maps give us these warnings: warnings.filterwarnings("ignore", category=UserWarning) if gt_dem is not None: axes[0].set_title("Ground-truth") axes[0].plot_surface(x_3d, y_3d, gt_dem, vmin=elevation_vmin, vmax=elevation_vmax, cmap=elevation_cmap) axes.append( fig.add_subplot(100 + num_cols * 10 + 2, projection="3d")) axes[1].set_title("Reconstruction") axes[1].plot_surface(x_3d, y_3d, rec_dem, vmin=elevation_vmin, vmax=elevation_vmax, cmap=elevation_cmap) axes.append( fig.add_subplot(100 + num_cols * 10 + 3, projection="3d")) axes[2].set_title("Occlusion") axes[2].plot_surface(x_3d, y_3d, occluded_elevation_map, vmin=elevation_vmin, vmax=elevation_vmax, cmap=elevation_cmap) warnings.filterwarnings("default", category=UserWarning) fig.colorbar(mat, ax=axes, fraction=0.015) for i, ax in enumerate(axes): if plot_robot_position: ax.scatter([rel_position[0]], [rel_position[1]], [rel_position[2]], marker="*", color="red") ax.set_xlabel("x [m]") ax.set_ylabel("y [m]") ax.set_zlabel("z [m]") # Hide grid lines ax.grid(False) plt.draw() plt.savefig(str(logdir / f"sample_3d_{idx}.pdf")) if self.remote is not True: plt.show() plt.close() if gt_dem is not None \ or rec_data_um is not None or model_um is not None: draw_error_uncertainty_plot( idx, logdir, gt_dem=gt_dem, rec_dem=rec_dem, comp_dem=comp_dem, rec_data_um=rec_data_um, comp_data_um=comp_data_um, model_um=model_um, total_um=total_um, robot_position_pixel=robot_position_pixel, remote=self.remote, indiv_vranges=indiv_vranges) if rec_dems is not None: draw_solutions_plot(idx, logdir, ChannelEnum.REC_DEMS, rec_dems, robot_position_pixel=robot_position_pixel, remote=self.remote) if comp_dems is not None: draw_solutions_plot(idx, logdir, ChannelEnum.COMP_DEMS, rec_dems, robot_position_pixel=robot_position_pixel, remote=self.remote) if ChannelEnum.REC_TRAV_RISK_MAP.value in data_hdf5_group \ and ChannelEnum.COMP_TRAV_RISK_MAP.value in data_hdf5_group: rec_trav_risk_map = data_hdf5_group[ ChannelEnum.REC_TRAV_RISK_MAP.value][idx, ...] comp_trav_risk_map = data_hdf5_group[ ChannelEnum.COMP_TRAV_RISK_MAP.value][idx, ...] draw_traversability_plot( idx, logdir, gt_dem=gt_dem, rec_dem=rec_dem, comp_dem=comp_dem, rec_data_um=rec_data_um, comp_data_um=comp_data_um, model_um=model_um, total_um=total_um, rec_trav_risk_map=rec_trav_risk_map, comp_trav_risk_map=comp_trav_risk_map, robot_position_pixel=robot_position_pixel, remote=self.remote) progress_bar.next() progress_bar.finish()
def magic_eight_ball(): responses = [ "There is never enough time in the morning. Try to combine brushing your teeth with your breakfast.", "A sticking plaster can heal any wound. You just have to believe.", "Floss. It's more important than you would think.", "You should probaby drink more water.", "You should consider buying a plunger before you need a plunger", "You know what you should probably earn more than you show, speak less than you know.", "Hahahahaha", "Once a week, take a bath in Epsom Salts, and if you can, add half cup of baking soda and some essential oil such as lavender.", "When exercising, count backwards. For example, if you are carrying out 20 sit ups, don’t count from 1 to 20, start at 20 and count backwards as you do them.", "Start listening to your gut instinct. It’s always right", "Never give anyone more than 2 chances.", "Wear sunscreen, even if you think you don't need it", "If you can do something in less than 5 minutes. Do it now.", "Always strive to stand and sit with good posture.", "Just have fun", "To be Idle is to be foolish", "You might want to run, but you should stay and fight.", "Face the truth with dignity", "Travel is in your future", "Don't wait for success to come - go find it!" ] question = (input( "Hi,Enter your question\n or..\n Enter F to crack your fortune cookie \nEnter Q to quit game " )).upper() if question == "Q": return "Come back again soon" elif question == "F": bar = Bar('Processing', max=20, suffix='%(percent)d%%') for i in range(20): time.sleep(.15) bar.next() bar.finish() print(random.choice(responses)) Continue = (input( "Play again?\n Enter 'Yes' to continue or...\n 'No' to exit game " )).upper() if Continue == "YES": magic_eight_ball() else: return "come back again" elif len(question) < 10: bar = Bar('Processing', max=20, suffix='%(percent)d%%') for i in range(20): time.sleep(.15) bar.next() bar.finish() print("Invalid input") Continue = (input( "Play again?\n Enter 'Yes' to continue or...\n 'No' to exit game " )).upper() if Continue == "YES": magic_eight_ball() else: return "come back again" else: bar = Bar('Processing', max=20, suffix='%(percent)d%%') for i in range(20): time.sleep(.15) bar.next() bar.finish() print(random.choice(responses)) Continue = (input( "Play again?\n Enter 'Yes' to continue or...\n 'No' to exit game " )).upper() if Continue == "YES": magic_eight_ball() else: return "come back again"
def verify_batch_consumer_performance(): """ Verify batch Consumer performance """ conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': uuid.uuid1(), 'session.timeout.ms': 6000, 'error_cb': error_cb, 'auto.offset.reset': 'earliest' } c = confluent_kafka.Consumer(conf) def my_on_assign(consumer, partitions): print('on_assign:', len(partitions), 'partitions:') for p in partitions: print(' %s [%d] @ %d' % (p.topic, p.partition, p.offset)) consumer.assign(partitions) def my_on_revoke(consumer, partitions): print('on_revoke:', len(partitions), 'partitions:') for p in partitions: print(' %s [%d] @ %d' % (p.topic, p.partition, p.offset)) consumer.unassign() c.subscribe([topic], on_assign=my_on_assign, on_revoke=my_on_revoke) max_msgcnt = 1000000 bytecnt = 0 msgcnt = 0 batch_size = 1000 print('Will now consume %d messages' % max_msgcnt) if with_progress: bar = Bar('Consuming', max=max_msgcnt, suffix='%(index)d/%(max)d [%(eta_td)s]') else: bar = None while msgcnt < max_msgcnt: # Consume until we hit max_msgcnt msglist = c.consume(num_messages=batch_size, timeout=20.0) for msg in msglist: if msg.error(): raise confluent_kafka.KafkaException(msg.error()) bytecnt += len(msg) msgcnt += 1 if bar is not None and (msgcnt % 10000) == 0: bar.next(n=10000) if msgcnt == 1: t_first_msg = time.time() if bar is not None: bar.finish() if msgcnt > 0: t_spent = time.time() - t_first_msg print('%d messages (%.2fMb) consumed in %.3fs: %d msgs/s, %.2f Mb/s' % (msgcnt, bytecnt / (1024 * 1024), t_spent, msgcnt / t_spent, (bytecnt / t_spent) / (1024 * 1024))) print('closing consumer') c.close()
# Start progress bar. max obtained from reading in the excel file and checking number of rows indexing_progress_bar = Bar("Reading in documents to train Word2Vec Model", max=NUM_DOCS) # Read in CSV dataset and remove headers from consideration csv_reader = csv.reader(csvfile) next(csv_reader, None) # Iterate over each row, and each row represents a document for row in csv_reader: # append title, content and court for training data = row[1] + row[2] + row[4] sentences.append(cleaner.clean(data)) # Update progress bar indexing_progress_bar.next() #End time end = time.time() #Time taken print(f"Time taken to index is {(end-start):.2f}s") train_start = time.time() # Progress bar finish indexing_progress_bar.finish() print("Starting training...") model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4) train_end = time.time()
def traintriandcls(mymodel,epoch,cuda_gpu,optimizer,mytraindata,scheduler): global min_loss batch_timer = AverageMeter() data_timer = AverageMeter() prec_losses = AverageMeter() acc_avg = AverageMeter() print('epoch {}'.format(epoch + 1)) trainloss=0. record=0 mytraindata.create_triplet_classbased_1(mymodel,args) mytrainloader = torch.utils.data.DataLoader(mytraindata, batch_size=args.batch_size, shuffle=True,num_workers=50) mymodel.train() bar = Bar('[{}]{}'.format('base-GGLM', 'train'), max=len(mytrainloader)) since = time.time() for index, ((img1,label1), (img2,label2), (img3,label3)) in enumerate(mytrainloader): data_timer.update(time.time() - since) if cuda_gpu: img1 = img1.cuda() label1=label1.cuda() img2 = img2.cuda() label2=label2.cuda() img3 = img3.cuda() label3=label3.cuda() img1 = img1.float() img2 = img2.float() img3 = img3.float() img1, img2, img3 = Variable(img1), Variable(img2), Variable(img3) optimizer.zero_grad() try: o1=mymodel(img1) o2=mymodel(img2) o3=mymodel(img3) out1=o1['out'];f1=o1['feature']; out2=o2['out'];f2=o2['feature']; out3=o3['out'];f3=o3['feature']; if args.balancedgpu: tloss = DataParallelCriterion(buildLoss.TripletLoss()) else: tloss=buildLoss.TripletLoss() tripletloss1 = tloss(f1, f2, f3) #tripletloss2=tloss(out1,out2,out3) #loss = 0.8*tripletloss1+0.2*tripletloss2 loss=tripletloss1 if loss.item() > 0: trainloss += loss.item() record += 1 loss.backward() optimizer.step() batch_timer.update(time.time() - since) since = time.time() prec_losses.update(loss, 1) log_msg = ('\n[epoch:{epoch}][iter:({batch}/{size})]' + '[lr:{lr}] loss: {loss:.4f}| eta: ' + '(data:{dt:.3f}s),(batch:{bt:.3f}s),(total:{tt:})') \ .format( epoch=epoch + 1, batch=index + 1, size=len(mytrainloader), lr=scheduler.get_lr()[0], loss=prec_losses.avg, dt=data_timer.val, bt=batch_timer.val, tt=bar.elapsed_td) print(log_msg) except Exception as e: print(e) continue index += 1 bar.next() bar.finish() pklword = args.train_dir.split('/')[-1] newpkl = 'parameter_%02d.pkl' % (epoch + 1) path = args.train_dir.replace(pklword, newpkl) is_best = trainloss < min_loss if is_best: min_loss = trainloss save_checkpoint({'epoch': epoch, 'model_state_dict': mymodel.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': trainloss, 'scheduer': scheduler }, is_best, path)
def fuzz_websockets(ws_address, init_messages, original_messages, session_active_message, ignore_tokens, ignore_errors, output, http_proxy_host, http_proxy_port): """ Creates a websocket connection, sends the payloads, writes output to disk. :param ws_address: The websocket address to connect and send messages to :param init_messages: The login messages to send before any payloads. This parameter accepts a serialized message (a string) or a function that will generate the string when called. :param session_active_message: Wait for this message after sending the init_messages. Usually This is the message that says: "Login successful". Use None if there are no messages to wait for. :param original_messages: The original messages to be fuzzed This parameter accepts a serialized message (a string) or a function that will generate the string when called. :param ignore_tokens: When generating messages with payloads, do not replace these parts of the message. In general you want to set this list to all the keys in the json objects. For example, if the json object looks like {"foo": "bar"} , and you only want to fuzz the "bar" part of the message set ignore_tokens to ["foo"] :param ignore_errors: Ignore these errors when they are returned by the application :param output: Save all messages here :param http_proxy_host: The HTTP host (None if proxy shouldn't be used) :param http_proxy_port: The HTTP proxy (None if proxy shouldn't be used) :return: None """ logging.info('Starting the fuzzing process...') payload_count = len(file(PAYLOADS).readlines()) with ThreadPoolExecutorWithQueueSizeLimit(max_workers=25) as ex: for original_message in original_messages: # TODO: Not sure if this is the best place to call the original_message # function, but I need to get the message string to be able to # tokenize it and fuzz it... original_message = serialize_message(original_message) logging.info('Fuzzing message: %s' % original_message) tokenized_messages = create_tokenized_messages( original_message, ignore_tokens) bar = Bar('Processing', max=len(tokenized_messages) * payload_count) for tokenized_count, tokenized_message in enumerate( tokenized_messages): for payload in file(PAYLOADS): bar.next() # You might want to modify this if the message is not JSON modified_message = replace_token_in_json( payload, tokenized_message) logging.debug('Generated fuzzed message: %s' % modified_message) messages_to_send = init_messages[:] messages_to_send.append(modified_message) ex.submit(send_payloads_in_websocket, ws_address, messages_to_send, session_active_message, ignore_errors, tokenized_count, output, http_proxy_host, http_proxy_port) bar.finish() logging.debug('Finished fuzzing process')
def verify_producer_performance(with_dr_cb=True): """ Time how long it takes to produce and delivery X messages """ conf = { 'bootstrap.servers': bootstrap_servers, 'linger.ms': 500, 'error_cb': error_cb } p = confluent_kafka.Producer(conf) msgcnt = 1000000 msgsize = 100 msg_pattern = 'test.py performance' msg_payload = (msg_pattern * int(msgsize / len(msg_pattern)))[0:msgsize] dr = MyTestDr(silent=True) t_produce_start = time.time() msgs_produced = 0 msgs_backpressure = 0 print('# producing %d messages to topic %s' % (msgcnt, topic)) if with_progress: bar = Bar('Producing', max=msgcnt) else: bar = None for i in range(0, msgcnt): while True: try: if with_dr_cb: p.produce(topic, value=msg_payload, callback=dr.delivery) else: p.produce(topic, value=msg_payload) break except BufferError: # Local queue is full (slow broker connection?) msgs_backpressure += 1 if bar is not None and (msgs_backpressure % 1000) == 0: bar.next(n=0) p.poll(100) continue if bar is not None and (msgs_produced % 5000) == 0: bar.next(n=5000) msgs_produced += 1 p.poll(0) t_produce_spent = time.time() - t_produce_start bytecnt = msgs_produced * msgsize if bar is not None: bar.finish() print('# producing %d messages (%.2fMb) took %.3fs: %d msgs/s, %.2f Mb/s' % (msgs_produced, bytecnt / (1024 * 1024), t_produce_spent, msgs_produced / t_produce_spent, (bytecnt / t_produce_spent) / (1024 * 1024))) print( '# %d temporary produce() failures due to backpressure (local queue full)' % msgs_backpressure) print('waiting for %d/%d deliveries' % (len(p), msgs_produced)) # Wait for deliveries p.flush() t_delivery_spent = time.time() - t_produce_start print('# producing %d messages (%.2fMb) took %.3fs: %d msgs/s, %.2f Mb/s' % (msgs_produced, bytecnt / (1024 * 1024), t_produce_spent, msgs_produced / t_produce_spent, (bytecnt / t_produce_spent) / (1024 * 1024))) # Fake numbers if not using a dr_cb if not with_dr_cb: print('# not using dr_cb') dr.msgs_delivered = msgs_produced dr.bytes_delivered = bytecnt print( '# delivering %d messages (%.2fMb) took %.3fs: %d msgs/s, %.2f Mb/s' % (dr.msgs_delivered, dr.bytes_delivered / (1024 * 1024), t_delivery_spent, dr.msgs_delivered / t_delivery_spent, (dr.bytes_delivered / t_delivery_spent) / (1024 * 1024))) print('# post-produce delivery wait took %.3fs' % (t_delivery_spent - t_produce_spent))
def run_epoch(self, phase, epoch, data_loader): model_with_loss = self.model_with_loss if phase == 'train': model_with_loss.train() else: if len(self.opt.gpus) > 1: model_with_loss = self.model_with_loss.module model_with_loss.eval() torch.cuda.empty_cache() opt = self.opt results = {} data_time, batch_time = AverageMeter(), AverageMeter() avg_loss_stats = {l: AverageMeter() for l in self.loss_stats} num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters) end = time.time() for iter_id, batch in enumerate(data_loader): if iter_id >= num_iters: break data_time.update(time.time() - end) for k in batch: if k != 'meta': batch[k] = batch[k].to(device=opt.device, non_blocking=True) output, loss, loss_stats = model_with_loss(batch) loss = loss.mean() if phase == 'train': self.optimizer.zero_grad() loss.backward() self.optimizer.step() batch_time.update(time.time() - end) end = time.time() Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format( epoch, iter_id, num_iters, phase=phase, total=bar.elapsed_td, eta=bar.eta_td) for l in avg_loss_stats: avg_loss_stats[l].update(loss_stats[l].mean().item(), batch['input'].size(0)) Bar.suffix = Bar.suffix + '|{} {:.4f} '.format( l, avg_loss_stats[l].avg) if not opt.hide_data_time: Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \ '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) if opt.print_iter > 0: if iter_id % opt.print_iter == 0: print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix)) else: bar.next() if opt.test: self.save_result(output, batch, results) del output, loss, loss_stats, batch bar.finish() ret = {k: v.avg for k, v in avg_loss_stats.items()} ret['time'] = bar.elapsed_td.total_seconds() / 60. return ret, results
def eval(net, test_loader, device): ''' Parameter: model: model after loading weights device: gpu or cpu? ''' net.eval().to(device) bar = Bar('Processing validate', max=len(test_loader)) loss_ = AverageMeter() acc_ = AverageMeter() se_ = AverageMeter() sp_ = AverageMeter() auc_ = AverageMeter() f1_ = AverageMeter() draw_curve = True preds_prob_list = [] preds_list = [] gts_list = [] with torch.no_grad(): for i, (data, label) in enumerate(test_loader): data = data.to(device) label = label.to(device) pred_prob, d1_probs, d2_probs, d3_probs, d4_probs = net( data) # after sigmoid function validate_loss = calc_loss(pred_prob, label, bce_weight=.5) # validate_loss += calc_loss(d2_probs, label, bce_weight=.5) # validate_loss += calc_loss(d3_probs, label, bce_weight=.5) # validate_loss += calc_loss(d4_probs, label, bce_weight=.5) loss_.update(validate_loss.item(), data.size(0)) preds = torch.gt(pred_prob, .5).float() # Convert to numpy format preds = preds.cpu().data.numpy()[:, 0] label = label.cpu().data.numpy()[:, 0] pred_prob = pred_prob.cpu().data.numpy()[:, 0] pred_prob = pred_prob.reshape([-1]) gt = label.reshape([-1]) preds = preds.reshape([-1]) CM = confusion_matrix(preds, gt) F1, Acc, Se, Sp, _ = calculate_Accuracy(CM) Auc = roc_auc_score(gt, pred_prob) if draw_curve: preds_prob_list.append(pred_prob) preds_list.append(preds) gts_list.append(gt) acc_.update(Acc, data.size(0)) se_.update(Se, data.size(0)) sp_.update(Sp, data.size(0)) auc_.update(Auc, data.size(0)) f1_.update(F1, data.size(0)) bar.suffix = '{batch}/{size}) | Loss: {loss:.3f}'.format( batch=i + 1, size=len(test_loader), loss=loss_.avg) bar.next() bar.finish() print('Acc: %s | F1: %s | Se: %s | Sp: %s | Auc: %s' % (str( acc_.avg), str(f1_.avg), str(se_.avg), str(sp_.avg), str(auc_.avg))) if draw_curve: # https://github.com/RanSuLab/DUNet-retinal-vessel-detection _preds_prob = np.asarray(preds_prob_list).reshape(-1) _preds = np.asarray(preds_list).reshape(-1) _gts = np.asarray(gts_list).reshape(-1) # Area under the ROC curve fpr, tpr, thresholds = roc_curve(_gts, _preds_prob) auc_roc = roc_auc_score(_gts, _preds_prob) plt.figure() # plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') plt.plot(fpr, tpr, 'darkorange', label='(AUC = %0.4f)' % auc_roc) # plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.0]) plt.title('ROC Curve', fontsize=14) plt.xlabel("FPR (False Positive Rate)", fontsize=14) plt.ylabel("TPR (True Positive Rate)", fontsize=14) plt.legend(loc="lower right") plt.xticks(fontsize=14) plt.yticks(fontsize=14) # skplt.metrics.plot_roc(_gts, _preds_prob) plt.savefig("ROC.png") # Precision-recall curve precision, recall, thresholds = precision_recall_curve( _gts, _preds_prob) precision = np.fliplr([ precision ])[0] # so the array is increasing (you won't get negative AUC) recall = np.fliplr([ recall ])[0] # so the array is increasing (you won't get negative AUC) auc_prec_rec = np.trapz(precision, recall) plt.figure() plt.plot(recall, precision, 'darkorange', label='Area Under the Curve (AUC = %0.4f)' % auc_prec_rec) plt.title('Precision - Recall curve', fontsize=14) plt.xlabel("Recall", fontsize=14) plt.ylabel("Precision", fontsize=14) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.0]) plt.legend(loc="lower right") plt.xticks(fontsize=14) plt.yticks(fontsize=14) plt.savefig("Precision_Recall.png") # according to f1 score return f1_.avg, loss_.avg