Exemple #1
0
def torcURL(address, filename):
	print('cURL on ' + address + ' to ' + filename + '\n')
	bar = Bar('Running', max=100)
	for i in range(100):
		output = io.BytesIO()
		torcURL = pycurl.Curl()
		torcURL.setopt(pycurl.URL, address)
		torcURL.setopt(pycurl.PROXY, '127.0.0.1')
		torcURL.setopt(pycurl.PROXYPORT, SOCKS_PORT)
		torcURL.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5_HOSTNAME)
		torcURL.setopt(pycurl.WRITEFUNCTION, output.write)
		bar.next()
	bar.finish()

	try:
		torcURL.perform()
		return output.getvalue()
		fp = open(filename, 'wb')
		fp.write(output.getvalue().encode('utf-8').strip())
		fp.close()
	except KeyboardInterrupt:
		raise KeyboardInterrupt
	except pycurl.error as e:
		return "Unable to reach %s (%s)" % (address, e)
		UnknownError()
	except Exception as e:
		UnknownError()
def evaluate(train_file_path, test_num, tagger, output_file_path):
    sents = parse_train_data(train_file_path)
    test_start = len(sents) - test_num - 1
    test_data = sents[test_start:len(sents)-1]
    train_data = sents[0:test_start+1]
    print 'Training with {0} sentences'.format(len(train_data))
    tagger.train(train_data)
    output = open(output_file_path, 'w')
    correct = 0
    total = 0
    bar = Bar('Testing with {0} sentences'.format(len(test_data)), max=len(test_data))
    for s in test_data:
        tagged = tagger.tag(remove_tags(s))
        # evaluate
        correct += evaluate_sentence(s, tagged)
        total += len(tagged)
        # write
        words = []
        for t in tagged:
            words.append(t[0] + '_' + t[1])
        output.write('\t'.join(words) + '\n')
        bar.next()
    bar.finish()
    output.close()
    return correct / float(total) * 100
def pipeline_pos(titles, descriptions, tags):
    def preprocess(inpt):
        return inpt

    # Create feature vectors of context and only keep images WITH context
    bar = Bar('Extracting features...', max=len(titles))
    pos_collection = []
    for i in xrange(len(titles)):
        # Stem words and remove stopwords for title...
        context = []
        title = preprocess(titles[i].split(' '))
        if title:
            context.append(title)
        # ... description (for each sentence) ...
        for desc in sent_tokenize(descriptions[i]):
            desc = preprocess(desc.split(' '))
            if desc:
                context.append(desc)
        # ... and tagsc
        ts = preprocess(tags[i])
        if ts:
            context.append(ts)
        
        pos = nltk.pos_tag_sents(context)
        pos = list(itertools.chain(*pos))
        pos_collection.append(pos)
        bar.next()
    bar.finish()

    return pos_collection
Exemple #4
0
def draw_poster(poster_text, textsize, inp):
    '''split out and highlight the words'''
    top_pad = 0.25
    left_pad = 9
    font = ImageFont.truetype("NotCourierSans.otf", textsize) #This font needs to be monopaced!
    im = Image.new("RGBA", (9933, 14043), "black") #A1 Size
    draw = ImageDraw.Draw(im) #Set up sheet to draw on
    
    print('Drawing text')
    bar = Bar('Processing', max=len(poster_text)) #Progress bar to entertain me while I watch this run
    
    for i, text in enumerate(poster_text):
        if "1969-07-21 02:56:48 CDR" in text:
            quote = "1969-07-21 02:56:48 CDR (TRANQ) That's one small step for man, one giant leap for mankind."
            text = text.split(quote)
            width_p1, h1 = draw.textsize(text[0], font=font)
            width_quote, h2 = draw.textsize(quote, font=font)
            draw.text((left_pad, int((i + top_pad) * textsize)), text[0], font=font, fill=(255,255,255,255)) #All text padded 4 pixels left
            draw.text((left_pad + width_p1, int((i + top_pad) * textsize)), quote, font=font, fill=(255,0,0,255)) 
            draw.text((left_pad + width_p1 + width_quote, int((i + top_pad) * textsize)), text[1], font=font, fill=(255,255,255,255))
            bar.next()
        else:
            draw.text((left_pad, int((i + top_pad) * textsize)), text, font=font, fill=(255,255,255,255))
            bar.next()
    bar.finish()
    
    print('Saving image!')    
    if inp == 'y':
        bleedx, bleedy = 10004, 14114
        bufferx, buffery = int((bleedx - 9933) / 2), int((bleedy - 14043) / 2)
        bleed_im = Image.new("RGBA", (10004, 14114), "black") #Bleed area for printing
        bleed_im.paste(im, (bufferx, buffery))
        bleed_im.save("output.png", "PNG")
    else:
        im.save("output.png", "PNG")
def pipeline_onehot(titles, descriptions, tags):
    # Create feature vectors of context and only keep images WITH context
    bar = Bar('Extracting features...', max=len(titles))
    docs = []
    for i in xrange(len(titles)):
        docs.append(u'{} {} {}'.format(titles[i], descriptions[i], ' '.join(tags[i])))

    vectorizer = CountVectorizer(min_df=5)
    X = vectorizer.fit_transform(docs)

    bar = Bar('Extracting features...', max=len(docs))
    idx_docs = []
    for idoc, doc in enumerate(docs):
        idxs    = X[idoc].nonzero()[1] + 1
        idxs    = idxs.tolist()
        idx_docs.append(idxs)
        bar.next()
    bar.finish()

    max_len = 500

    bar = Bar('Merging into one matrix...', max=len(idx_docs))
    for i, idx_doc in enumerate(idx_docs):
        features = np.zeros((1, max_len), np.int64)
        vec = np.array(idx_doc[:max_len])
        features[0, :vec.shape[0]] = vec

        if i == 0:
            feat_flatten = csr_matrix(features.flatten())
        else:
            feat_flatten = vstack([feat_flatten, csr_matrix(features.flatten())])
        bar.next()
    bar.finish()

    return feat_flatten, vectorizer
Exemple #6
0
def saveAverageImage(kitti_base, pos_labels, shape, fname, avg_num=None):
    num_images = float(len(pos_labels))
    avg_num = min(avg_num, num_images)
    if avg_num is None:
        avg_num = num_images

    # avg_img = np.zeros((shape[0],shape[1],3), np.float32)
    avg_img = np.zeros(shape, np.float32)
    progressbar = ProgressBar('Averaging ' + fname, max=len(pos_labels))
    num = 0
    for label in pos_labels:
        if num >= avg_num:
            break
        num += 1
        progressbar.next()
        sample = getCroppedSampleFromLabel(kitti_base, label)
        # sample = np.float32(sample)

        resized = resizeSample(sample, shape, label)

        resized = auto_canny(resized)
        resized = np.float32(resized)

        avg_img = cv2.add(avg_img, resized / float(avg_num))
    progressbar.finish()

    cv2.imwrite(fname, avg_img)
def get_list(filename):
	"""
	Creates an array of objects out of 
	input training file
	==================================
	Returns:
		* array of objects where each
		object corresponds to a document
	==================================
	"""

	fo = open(filename)
	lines = fo.readlines()
	fo.close()
	total = len(lines)
	obj_arr = []
	vec_arr = []
	bar = Bar("Processing", max=total, suffix='%(percent)d%% | %(index)d of %(max)d | %(eta)d seconds remaining.')
	num = 0
	for each in lines:
		send_obj = files(each.split('\n')[0].split('\t'))
		send_obj.set_word_count(5)
		send_obj.set_pos_features()
		send_obj.set_punctuation_features()
		send_obj.set_vectors()
		obj_arr.append(send_obj)
		bar.next()
	bar.finish()
	return obj_arr
def main():
    infile = raw_input('Input file name: ')
    if os.path.exists(infile):
        print '\n[!] Loading PCAP file. Please wait, it might take a while...'
        ips = sorted(set(p[IP].src for p in PcapReader(infile) if IP in p))

        total = len(ips)
        print '[!] Total number of IP addresses: %d\n' % total

        bar = Bar('Processing', max=total)
        for ip in ips:
            get_data(ip)
            bar.next()
        bar.finish()

        headers = ['IP', 'OWNER','COUNTRY', 'ORGANIZATION','SERVER','DESCRIPTION']
        print '\n\n'
        print tabulate(table,headers,tablefmt='grid')
        if exceptions:
            print '\nExceptions:'
            for e in exceptions:
                print '*\t%s' % e
            print '\n\n[!] Done.\n\n'
    else:
        print '[!] Cannot find file "%s"\n\tExiting...' % infile
        sys.exit()
Exemple #9
0
def hydrate(idlist_file="data/example_dataset_tweet_ids.txt"):
    """
    This function reads a file with tweet IDs and then loads them
    through the API into the database. Prepare to wait quite a bit,
    depending on the size of the dataset.
    """
    ids_to_fetch = set()
    for line in open(idlist_file, "r"):
        # Remove newline character through .strip()
        # Convert to int since that's what the database uses
        ids_to_fetch.add(int(line.strip()))
    # Find a list of Tweets that we already have
    ids_in_db = set(t.id for t in database.Tweet.select(database.Tweet.id))
    # Sets have an efficient .difference() method that returns IDs only present
    # in the first set, but not in the second.
    ids_to_fetch = ids_to_fetch.difference(ids_in_db)
    logging.warning(
        "\nLoaded a list of {0} tweet IDs to hydrate".format(len(ids_to_fetch)))

    # Set up a progressbar
    bar = Bar('Fetching tweets', max=len(ids_to_fetch), suffix='%(eta)ds')
    for page in rest.fetch_tweet_list(ids_to_fetch):
        bar.next(len(page))
        for tweet in page:
            database.create_tweet_from_dict(tweet)
    bar.finish()
    logging.warning("Done hydrating!")
def gradient_descent(X, Y, iter, alpha):
    (rows, cols) = X.shape
    Xt = X.T
    w = numpy.zeros((len(Xt), 1))
    print w.shape
    bar = Bar('iterations', max=iter)
    for i in range(0, iter):
        pw = w
        dw =  2*matrix.dot(matrix.dot(Xt,X), w) - matrix.dot(Xt, Y)

        # if (True):
        #     # print "alpha " + str(alpha)
        #     # print "E is " + str(dw.T.dot(dw).sum())
        #     # print dw
        #     print w
        w = w - alpha*dw/rows
        diff =numpy.absolute(w-pw).sum()
        print "Diff is %f " % diff
        if (diff < 0.000001):
            bar.finish()
            return w

        # raw_input()
        bar.next()
    bar.finish()
    return w
def main(args):
	d = json.load(open(args.c, 'r'))

	np.random.seed(1234)

	im2id  = {}
	id2cap = {}

	print 'img 2 id....'
	for im in d['images']:
		im2id[im['file_name']] = im['id']

	bar = Bar('id 2 cap...', max=len(d['annotations']))
	for ann in d['annotations']:
		cap = nltk.word_tokenize(ann['caption'])
		cap = ' '.join(cap).lower()
		if ann['image_id'] in id2cap:
			id2cap[ann['image_id']].append(cap)
		else:
			id2cap[ann['image_id']] = [cap]
		bar.next()
	bar.finish()

	with open(args.s, 'r') as f:
		images = f.read().split()

	refs = []
	for im in images:
		refs.append('<>'.join(id2cap[im2id[im]]))

	with open(args.saveto, 'w') as f:
		print >>f, '\n'.join(refs)
class Closest(object):
    data = pd.DataFrame()
    cols = []
    bar = None

    def __init__(self, df, cols, size):
        self.data = df
        self.cols = cols
        self.bar = Bar(message="Compressing Time", max=size,
                       suffix="%(percent)d%% (%(index)d/%(max)d) ETA %(eta_td)s")
        return

    def __call__(self, row):
        self.bar.next()
        found = self.data[(self.data.restaurant_id == row.restaurant_id) & (self.data.date <= row.date)]
        if found.shape[0] == 0:
            # FIXME Do something smarter than averaging?
            found = self.data[(self.data.restaurant_id == row.restaurant_id)][self.cols].mean()
        else:
            found = found[self.cols].sum()
        # FIXME Sometimes NaNs appear if I am missing the restaurant ID.  What to do?
        found.fillna(0, inplace=True)
        row[self.cols] = found
        return row

    def __del__(self):
        self.bar.finish()
Exemple #13
0
def read_and_gen(lyric_path,file_path):
    """
    read file and generate mp3 sound file
    :param file_path:
    :return:
    """

    #remove original before adding new content in it
    if os.path.exists(file_path):
        os.remove(file_path)

    with open(lyric_path, encoding="utf-8") as file:
        file = file.readlines()
    bar = Bar('Processing', max=file.__len__())
    for line in file:
        if is_alphabet(line[0]):
            #line should be spoken in en
            speak = gtts_extends(line,lang='en')
            speak.sequence_save(file_path)

        if is_chinese((line[0])):
            speak = gtts_extends(line, lang='zh')
            speak.sequence_save(file_path)
        bar.next()
    bar.finish()
    print("transform success!")
def tokenize_proteins(data, msg='Processing proteins'):
    """Distribute all poses into either decoys list or actives OrderedDict.
    Poses placed into the actives OrderedDict are further organized into
    sublists for each ligand.

    args:
        @data list of string lines containing pose data
        @msg string message to display in progress bar
    returns:
        @actives OrderedDict of all active poses gathered from data
        @decoys list of all decoy poses gathered from data
    """

    actives = OrderedDict()
    decoys  = list()
    bar = Bar(msg, max=len(data))

    for i, line in enumerate(data):
        bar.next()
        pose = posedict(line)  # Token -> List
        if pose['label'] == 1:  # Pose -> Decoys
            pose['id'] = pose['ligand'] + '-' + str(i)
            actives.setdefault(pose['ligand'], []).append(pose)
        else:  # Pose -> Actives
            decoys.append(pose)
    bar.finish()
    print ""

    return actives, decoys
Exemple #15
0
def average_image(pos_region_generator, shape, avg_num=None):
    pos_regions = list(pos_region_generator)

    num_images = float(len(pos_regions))
    if avg_num is None:
        avg_num = num_images
    else:
        avg_num = min(avg_num, num_images)

    window_dims = (shape[1], shape[0])

    # avg_img = np.zeros((shape[0],shape[1],3), np.float32)
    avg_img = np.zeros(shape, np.float32)
    progressbar = ProgressBar('Averaging ', max=avg_num)
    num = 0
    for reg in pos_regions:
        if num >= avg_num:
            break
        num += 1
        progressbar.next()

        resized = reg.load_cropped_resized_sample(window_dims)

        resized = auto_canny(resized)
        resized = np.float32(resized)

        avg_img = cv2.add(avg_img, resized / float(avg_num))
    progressbar.finish()

    return avg_img
Exemple #16
0
def main(argv):
  args = argparser.parse_args()

  print >> sys.stderr, '# Start: Keyword Data: %s, %s, %s, %s' % (args.cc, args.week, args.pages, datetime.datetime.now().time().isoformat())

  ga, gsc = initialize_service(argv, "analytics"), initialize_service(argv, "webmasters")

  print '"%s"\t"%s"\t"%s"\t"%s"\t"%s"\t"%s"\t"%s"\t"%s"\t"%s"\t"%s"' % ("cc", "website", "url", "date", "keyword", "impressions", "clicks", "ctr", "position", "sessions (week)")
  
  bar = Bar('Processing', max=args.pages, suffix ='%(percent).1f%% - %(eta)ds')
  
  for website in GA_IDS[args.cc]:

    urls = get_top_landing_pages(ga, args.cc, website, args.week, args.pages)
    for row in urls:

      data = []

      # we switched from http to https between week 3 and 4
      if (args.week <= 4 and args.cc != 'VN') or website != "IPRICE":
        data.extend(get_keyword_data(gsc, args.cc, website, args.week, row[0][1:], "http"))
      if (args.week >=3 or args.cc == 'VN') and website == "IPRICE":
        data.extend(get_keyword_data(gsc, args.cc, website, args.week, row[0][1:], "https"))

      output(args.cc, website, row[0], row[1], data)

      bar.next()
    bar.finish()
      
  print >> sys.stderr, '# End: Keyword Data: %s, %s, %s, %s' % (args.cc, args.week, args.pages, datetime.datetime.now().time().isoformat())
 def set_image_objects(self):
     landsat8 = "(acquisitionDate >= date'2013-01-01' AND acquisitionDate <= date'2016-12-31') AND (dayOfYear >=1 AND dayOfYear <= 366) AND (sensor = 'OLI') AND (cloudCover <= 20)"
     landsat7 = "(acquisitionDate >= date'2003-01-01' AND acquisitionDate <= date'2016-12-31') AND (dayOfYear >=1 AND dayOfYear <= 366) AND (sensor = 'ETM_SLC_OFF') AND (cloudCover <= 20)"
     landsat4_5 = "(acquisitionDate >= date'1982-01-01' AND acquisitionDate <= date'2011-12-31') AND (dayOfYear >=1 AND dayOfYear <= 366) AND (sensor = 'TM') AND (cloudCover <= 20)"
     landsat1_5 = "(acquisitionDate >= date'1972-01-01' AND acquisitionDate <= date'2013-12-31') AND (dayOfYear >=1 AND dayOfYear <= 366) AND (sensor = 'MSS') AND (cloudCover <= 20)"
     queries_name = ["landsat8","landsat7","landsat4_5","landsat1_5"]
     queries = [landsat8,landsat7,landsat4_5,landsat1_5]
     # query = self._query(parms)
     obj = []
     count = 0
     for q in queries:
         parms = {
             "f":"json",
             "where":q,
             "geometry":self.bounding_box["geometry"],
             "returnGeometry":"false",
             "spatialRel":"esriSpatialRelIntersects",
             "geometryType":"esriGeometryEnvelope",
             "inSR":self.bounding_box["geometry"]["spatialReference"]["wkid"],
             "outSR":self.bounding_box["geometry"]["spatialReference"]["wkid"],
             "outFields":"*",
             "orderByFields":"dayOfYear"
         }
         query = self._query(parms)
         bar = Bar("Requesting data: "+queries_name[count] , max=len(queries))
         for i in query["features"]:
             obj.append(i)
             bar.next()
         bar.finish()
         count = count + 1
     return obj
Exemple #18
0
def main(argv):
	args = argparser.parse_args()

	print >> sys.stderr, '# Start: Matching: %s' % (datetime.datetime.now().time().isoformat())

	masterbrain = read(args.masterbrain)
	keywords = read(args.keywords)

	bar = Bar('Processing', max=len(masterbrain), suffix ='%(percent).1f%% - %(eta)ds')

	regex = {}
	for keyword in keywords:
		regex[keyword] = re.compile(r'\b({0})\b'.format(keyword))

	matches = 0
	for string in masterbrain:
		for keyword in keywords:
			if regex[keyword].search(string):
				matches = matches + 1
				print 1, "\t", string, "\t", keyword
				break
		else:
			print 0, "\t", string
		bar.next()

	bar.finish()

	print matches, "/", len(masterbrain)

	print >> sys.stderr, '# End: Matching: %s' % (datetime.datetime.now().time().isoformat())
Exemple #19
0
def keyadd(name):
    bar = Bar('Processing', max=5)
    try:
        bar.next()
        nova('keypair-add', '--pub-key', '~/.ssh/id_rsa.pub', '%s'
             % name)
    except:

        # print "Key add error on %s" % name

        bar.next()
        try:
            bar.next()

            # print "Tryig to delete key"

            result = nova('keypair-delete', '%s' % name)

            # print result
            # print "Tryig to add key"

            bar.next()
            results = nova('keypair-add', '--pub-key',
                           '~/.ssh/id_rsa.pub', '%s' % name)
        except:

            # print result

            print '''
Key deletion error on %s
''' % name
    bar.next()
    bar.finish()
    result = nova('keypair-list')
    print result
def hough(im, ntx=460, mry=360):


    pim = im.load()
    nimx, mimy = im.size
    mry = int(mry/2)*2
    him = Image.new("L", (ntx, mry), 255)
    phim = him.load()

    rmax = hypot(nimx, mimy)
    dr = rmax / (mry/2)
    dth = pi / ntx
    bar = Bar('Processing', max=nimx)
    for jx in xrange(nimx):
        for iy in xrange(mimy):
            col = pim[jx, iy]
            if col == 255: continue
            for jtx in xrange(ntx):
                th = dth * jtx
                r = jx*cos(th) + iy*sin(th)
                iry = mry/2 + int(r/dr+0.5)
                try:
                    phim[jtx, iry] -= 1
                except:
                    print 'error'
        bar.next()
    del bar
    return him
Exemple #21
0
def main(argv):
	args = argparser.parse_args()

	print >> sys.stderr, '# Start: Adwords Data: %s, %s' % (args.cc, datetime.datetime.now().time().isoformat())

	service = initialize_service()
	keywords = read_file(args.file)

	print '"%s"\t"%s"\t"%s"\t"%s"' % ("keyword", "sv (month)", "competition", "cpc ($)")

	bar = Bar('Processing', max=len(keywords), suffix ='%(percent).1f%% - %(eta)ds')
	if args.stats:
		# pagination of 800 items
		kws = keywords
		while len(kws) > 0:
			page = kws[0:PAGE_SIZE]
			kws = kws[PAGE_SIZE:]

			output(query_adwords(service, args.cc, page, "STATS"))

			bar.next(len(page))

	elif args.ideas:
		# pagination of 1 item
		for kw in keywords:
			output(get_keyword_suggestions(service, args.cc, "IDEAS"))

			bar.next()

	bar.finish()
	
	print >> sys.stderr, '# End: Adwords Data: %s, %s' % (args.cc, datetime.datetime.now().time().isoformat())
Exemple #22
0
 def parse(self, dataset):
     """
     :type dataset: nala.structures.data.Dataset
     """
     outer_bar = Bar('Processing [SpaCy]', max=len(list(dataset.parts())))
     for part in dataset.parts():
         sentences = part.get_sentence_string_array()
         for index, sentence in enumerate(sentences):
             doc = self.nlp(sentence)
             for token in doc:
                 tok = part.sentences[index][token.i]
                 tok.features = {
                                 'id': token.i,
                                 'pos': token.tag_,
                                 'dep': token.dep_,
                                 'lemma': token.lemma_,
                                 'prob': token.prob,
                                 'is_punct': token.is_punct,
                                 'is_stop': token.is_stop,
                                 'cluster': token.cluster,
                                 'dependency_from': None,
                                 'dependency_to': [],
                                 'is_root': False,
                                }
                 part.tokens.append(tok)
             for tok in doc:
                 self._dependency_path(tok, index, part)
         part.percolate_tokens_to_entities()
         part.calculate_token_scores()
         part.set_head_tokens()
         outer_bar.next()
     outer_bar.finish()
     if self.constituency_parser == True:
         self.parser.parse(dataset)
 def getUsers(hubname):
     log = open(HubAnalyzer.logfile, "a")
     print("hub: " + hubname + " ----------------- ", file=log)
     print(time.strftime("%H:%M:%S"), file=log)
     # clean the file to write users to
     url = HubAnalyzer.hubname2link(hubname)
     output_filename = "data/hubs/" + hubname
     # if data is here, do nothing
     if os.path.isfile(output_filename) and not HubAnalyzer.enforce_download_in_presence_of_data:
         print("data is already here, abort this url", file=log)
         return None
     output_file = open(output_filename, "w")
     try:
         last_page_num = int(HubAnalyzer.getLastPageNumber(url))
     except Exception as err:
         print("URL is broken, abort the url", file=log)
         log.flush()
         os.remove(output_filename)
         raise Exception("Cannot analyze the page, please, check the url below: \n" + url)
     # get connection to habrahabr-hub
     suffix = "/subscribers/rating/page"
     userlist_url = url + suffix
     http = urllib3.PoolManager()
     if HubAnalyzer.report_downloading_progress:
         HubAnalyzer.get_hub_description(hubname)
         bar = Bar("Downloading: " + hubname, max=last_page_num, suffix="%(percent)d%%")
     for i in range(1, last_page_num + 1):
         user_page = userlist_url + str(i)
         print(user_page, file=log)
         log.flush()
         try:
             response = http.request("GET", user_page)
         except urllib3.exceptions.HTTPError as err:
             if err.code == 404:
                 print(user_page + " !! 404 !!", file=log)
                 log.flush()
                 output_file.close()
                 os.remove(output_filename)
                 raise ("Hub is not found, please, check the url")
             else:
                 print(user_page + " PARSING ERROR ", file=log)
                 log.flush()
                 output_file.close()
                 os.remove(output_filename)
                 raise Exception("Error: cannot parse the page!")
         html = response.data
         soup = BeautifulSoup(html)
         usersRow = soup.find_all(class_="user ")
         for userRow in usersRow:
             username = userRow.find(class_="username").text
             print(username, file=output_file)
         output_file.flush()
         if HubAnalyzer.report_downloading_progress:
             bar.next()
     # finalize and close everything
     if HubAnalyzer.report_downloading_progress:
         bar.finish()
     output_file.close()
     log.close()
Exemple #24
0
def editorial_publish(guides,
                      endpoint,
                      function_class,
                      user_agent,
                      nailgun_bin,
                      content_generator):
    """
    takes care of publishing the editorial content for the guides.
    """

    # init the nailgun thing for ed content generation.
    nailguninit(nailgun_bin,content_generator)


    searches= {}

    pbar = Bar('extracting editorial content for guides:',max=len(guides)+1)
    pbar.start()

    error = False
    for i, guide in enumerate(guides):
        jsonguide = None
        with open(guide,'r') as g:
            jsonguide = json.load(g)

        if not jsonguide:
            logging.error('could not load json from {0}'.format())
            error = True
            continue
        search = cityinfo.cityinfo(jsonguide)
        uri = cityres.cityres(search,endpoint)
        if not uri:
            logging.error(
                    'no dbpedia resource was found for {0}'.format(guide))
            error = True
            continue
        urls = urlinfer.urlinferdef([unquote(uri)])
        if len(urls) < 1:
            logging.error('no wikipedia/wikivoyage urls found/inferred'\
                   ' for resource {0}'.format(uri))
            error = True
            continue
        content = editorial_content(urls,function_class,user_agent)
        if not content:
            logging.error('no editorial content could be'\
                    ' generated for {0}'.format(guide))
            error = True
            continue

        #insert the content into the guide
        jsonsert.jsonsert(content, guide)

        logging.info('editorial content for {0} sucessfully'\
                ' inserted.'.format(guide))
        pbar.next()

    pbar.finish()
    return error
    def evolve(self, population, cxpb, mutpb, mutfq, ngen, goal):

        # Cheapest classifier.
        clf = LinearRegression(normalize=True)

        # Evaluate fitnesses of starting population.
        fitness_list = map(lambda x: self.evaluate(x, clf), population)

        # Assign fitness values.
        for individual, fitness in zip(population, fitness_list):
            individual.fitness.values = fitness

        best = max(population, key=lambda x: x.fitness.values[0])

        # So that we know things are happening.
        bar = Bar('Evolving', max=ngen)

        # Evolution!
        for gen in xrange(ngen):

            if best.fitness.values[0] > goal:
                break

            # Select the next generation of individuals.
            offspring = []
            offspring.append(best)
            offspring += tools.selTournament(population, len(population)-1, 10)
            offspring = map(self.toolbox.clone, offspring)

            # Apply crossovers.
            for child_a, child_b in zip(offspring[::2], offspring[1::2]):  # Staggered.
                if random.random() < cxpb:
                    self.crossover(child_a, child_b, cxpb)
                    del child_a.fitness.values
                    del child_b.fitness.values

            # Apply mutations.
            for child in offspring:
                if random.random() < mutpb:
                    self.mutate(child, mutfq)
                    del child.fitness.values

            # Reevaluate fitness of changed individuals.
            new_children = [e for e in offspring if not e.fitness.valid]
            fitness_list = map(lambda x: self.evaluate(x, clf), population)
            for individual, fitness in zip(new_children, fitness_list):
                individual.fitness.values = fitness

            # Replace old population with new generation.
            best = max(population, key=lambda x: x.fitness.values[0])
            population = offspring

            # Progress!
            bar.next()

        # Done! Return the most fit evolved individual.
        bar.finish()
        return best
def do_epoch(mode, epoch, skipped=0):
    # mode is 'train' or 'test'
    y_true = []
    y_pred = []
    avg_loss = 0.0
    prev_time = time.time()

    batches_per_epoch = dmn.get_batches_per_epoch(mode)

    if mode=="test":
        batches_per_epoch=min(1000,batches_per_epoch)
    bar=Bar('processing',max=batches_per_epoch)
    for i in range(0, batches_per_epoch):
        step_data = dmn.step(i, mode)
        prediction = step_data["prediction"]
        answers = step_data["answers"]
        current_loss = step_data["current_loss"]
        current_skip = (step_data["skipped"] if "skipped" in step_data else 0)
        log = step_data["log"]

        skipped += current_skip

        if current_skip == 0:
            avg_loss += current_loss

            for x in answers:
                y_true.append(x)

            for x in prediction.argmax(axis=1):
                y_pred.append(x)

            # TODO: save the state sometimes
            if (i % args.log_every == 0):
                cur_time = time.time()
                #print ("  %sing: %d.%d / %d \t loss: %.3f \t avg_loss: %.3f \t skipped: %d \t %s \t time: %.2fs" %
                #    (mode, epoch, i * args.batch_size, batches_per_epoch * args.batch_size,
                #     current_loss, avg_loss / (i + 1), skipped, log, cur_time - prev_time))
                prev_time = cur_time

        if np.isnan(current_loss):
            print "==> current loss IS NaN. This should never happen :) "
            exit()
        bar.next()
    bar.finish()

    avg_loss /= batches_per_epoch
    print "\n  %s loss = %.5f" % (mode, avg_loss)
    print "confusion matrix:"
    print metrics.confusion_matrix(y_true, y_pred)

    accuracy = sum([1 if t == p else 0 for t, p in zip(y_true, y_pred)])
    print "accuracy: %.2f percent" % (accuracy * 100.0 / batches_per_epoch / args.batch_size)

    if len(accuracies)>0 and accuracies[-1]>accuracy:
        dmn.lr=dmn.lr*args.learning_rate_decay
    accuracies.append(accuracy)
    return avg_loss, skipped
 def get_stale_files(self, media_files):
     django_models_with_file_fields = self.get_django_models_with_file_fields()
     stale_files = []
     bar = Bar('Analyzing media files', max=len(media_files))
     for media_file in media_files:
         if not self.remove_file_if_not_exists_in_db(media_file, django_models_with_file_fields):
             stale_files.append(media_file)
         bar.next()
     bar.finish()
     return stale_files
Exemple #28
0
def save_regions(reg_gen, num_regions, window_dims, save_dir):
    progressbar = ProgressBar('Saving regions', max=num_regions)
    index = 0
    for img_region in itertools.islice(reg_gen, 0, num_regions):
        fname = os.path.join(save_dir, '{:06d}.png'.format(index))
        index += 1
        sample = img_region.load_cropped_resized_sample(window_dims)
        cv2.imwrite(fname, sample)
        progressbar.next()
    progressbar.finish()
Exemple #29
0
 def resample(self, rm, num_points):
     node = point.make(self.start.x, self.start.y)
     rm.insert(node)
     bar = Bar("Generating Roadmap", max=num_points)
     for i in xrange(num_points):
         sample = point.get_random_point(self.width, self.height)
         rm.insert(sample)
         bar.next()
     bar.finish()
     return rm
Exemple #30
0
def update_api(contract_paths, old_api):
    bar = Bar("Contracts", max=len(contract_paths))
    new_api = {"events": {}, "functions": {}}
    for contract_name, contract_path in contract_paths.items():
        events_api, functions_api = update_contract_api(contract_name, contract_path, old_api)
        if bool(events_api): new_api["events"].update(events_api)
        new_api["functions"][contract_name] = functions_api
        bar.next()
    bar.finish()
    return new_api
Exemple #31
0
    def train(self, epoch):

        self.current_epoch = epoch

        if self.args.freeze and epoch > 10:
            self.model.freeze_weighting_of_rasc()
            self.optimizer_G = torch.optim.Adam(
                filter(lambda p: p.requires_grad, self.model.parameters()),
                lr=self.args.lr,
                betas=(0.5, 0.999),
                weight_decay=self.args.weight_decay)

        batch_time = AverageMeter()
        data_time = AverageMeter()
        LoggerLossG = AverageMeter()
        LoggerLossGGAN = AverageMeter()
        LoggerLossGL1 = AverageMeter()

        LoggerLossD = AverageMeter()
        LoggerLossDreal = AverageMeter()
        LoggerLossDfake = AverageMeter()

        lossMask8s = AverageMeter()
        lossMask4s = AverageMeter()
        lossMask2s = AverageMeter()

        # switch to train mode
        self.model.train()
        self.discriminator.train()

        end = time.time()

        bar = Bar('Processing {} '.format(self.args.arch),
                  max=len(self.train_loader))

        for i, (inputs, target) in enumerate(self.train_loader):

            input_image, mask, m2s, m4s, m8s = inputs

            current_index = len(self.train_loader) * epoch + i
            valid = torch.ones((input_image.size(0), self.patch, self.patch),
                               requires_grad=False).cuda()
            fake = torch.zeros((input_image.size(0), self.patch, self.patch),
                               requires_grad=False).cuda()

            reverse_mask = 1 - mask

            if self.args.gpu:
                input_image = input_image.cuda()
                mask = mask.cuda()
                m2s = m2s.cuda()
                m4s = m4s.cuda()
                m8s = m8s.cuda()
                reverse_mask = reverse_mask.cuda()
                target = target.cuda()
                valid.cuda()
                fake.cuda()

            # ---------------
            # Train model
            # --------------

            self.optimizer_G.zero_grad()
            fake_input, mask8s, mask4s, mask2s = self.model(
                torch.cat((input_image, mask), 1))

            pred_fake = self.discriminator(fake_input, input_image)
            loss_GAN = self.criterion_GAN(pred_fake, valid)
            loss_pixel = self.criterion_L1(fake_input, target)  # fake in
            # here two choice: mseLoss or NLLLoss
            masked_loss8s = self.attentionLoss8s(mask8s, m8s)
            masked_loss4s = self.attentionLoss4s(mask4s, m4s)
            masked_loss2s = self.attentionLoss2s(mask2s, m2s)
            loss_G = loss_GAN + 100 * loss_pixel + 90 * masked_loss8s + 90 * masked_loss4s + 90 * masked_loss2s

            loss_G.backward()
            self.optimizer_G.step()

            self.optimizer_D.zero_grad()
            pred_real = self.discriminator(target, input_image)
            loss_real = self.criterion_GAN(pred_real, valid)
            pred_fake = self.discriminator(fake_input.detach(), input_image)
            loss_fake = self.criterion_GAN(pred_fake, fake)
            loss_D = 0.5 * (loss_real + loss_fake)
            loss_D.backward()
            self.optimizer_D.step()

            # ---------------------
            #        Logger
            # ---------------------

            LoggerLossGGAN.update(loss_GAN.item(), input_image.size(0))
            LoggerLossGL1.update(loss_pixel.item(), input_image.size(0))
            LoggerLossG.update(loss_G.item(), input_image.size(0))
            LoggerLossDfake.update(loss_real.item(), input_image.size(0))
            LoggerLossDreal.update(loss_fake.item(), input_image.size(0))
            LoggerLossD.update(loss_D.item(), input_image.size(0))
            lossMask8s.update(masked_loss8s.item(), input_image.size(0))
            lossMask4s.update(masked_loss4s.item(), input_image.size(0))
            lossMask2s.update(masked_loss2s.item(), input_image.size(0))

            # ---------------------
            #        Visualize
            # ---------------------

            if i == 1:
                self.writer.add_images('train/Goutput', deNorm(fake_input),
                                       current_index)
                self.writer.add_images('train/target', deNorm(target),
                                       current_index)
                self.writer.add_images('train/input', deNorm(input_image),
                                       current_index)
                self.writer.add_images('train/mask', mask.repeat((1, 3, 1, 1)),
                                       current_index)
                self.writer.add_images('train/attention2s',
                                       mask2s.repeat(1, 3, 1, 1),
                                       current_index)
                self.writer.add_images('train/attention4s',
                                       mask4s.repeat(1, 3, 1, 1),
                                       current_index)
                self.writer.add_images('train/attention8s',
                                       mask8s.repeat(1, 3, 1, 1),
                                       current_index)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            # plot progress
            bar.suffix = '({batch}/{size}) Data: {data:.2f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss D: {loss_d:.4f} | Loss G: {loss_g:.4f} | Loss L1: {loss_l1:.6f} '.format(
                batch=i + 1,
                size=len(self.train_loader),
                data=data_time.val,
                bt=batch_time.val,
                total=bar.elapsed_td,
                eta=bar.eta_td,
                loss_d=LoggerLossD.avg,
                loss_g=LoggerLossGGAN.avg,
                loss_l1=LoggerLossGL1.avg)
            bar.next()

        bar.finish()
        self.writer.add_scalar('train/loss/GAN', LoggerLossGGAN.avg, epoch)
        self.writer.add_scalar('train/loss/D', LoggerLossD.avg, epoch)
        self.writer.add_scalar('train/loss/L1', LoggerLossGL1.avg, epoch)
        self.writer.add_scalar('train/loss/G', LoggerLossG.avg, epoch)
        self.writer.add_scalar('train/loss/Dreal', LoggerLossDreal.avg, epoch)
        self.writer.add_scalar('train/loss/Dfake', LoggerLossDfake.avg, epoch)

        self.writer.add_scalar('train/loss_Mask8s', lossMask8s.avg, epoch)
        self.writer.add_scalar('train/loss_Mask4s', lossMask4s.avg, epoch)
        self.writer.add_scalar('train/loss_Mask2s', lossMask2s.avg, epoch)
Exemple #32
0
def verify_stats_cb():
    """ Verify stats_cb """
    def stats_cb(stats_json_str):
        global good_stats_cb_result
        stats_json = json.loads(stats_json_str)
        if topic in stats_json['topics']:
            app_offset = stats_json['topics'][topic]['partitions']['0'][
                'app_offset']
            if app_offset > 0:
                print("# app_offset stats for topic %s partition 0: %d" %
                      (topic, app_offset))
                good_stats_cb_result = True

    conf = {
        'bootstrap.servers': bootstrap_servers,
        'group.id': uuid.uuid1(),
        'session.timeout.ms': 6000,
        'error_cb': error_cb,
        'stats_cb': stats_cb,
        'statistics.interval.ms': 200,
        'auto.offset.reset': 'earliest'
    }

    c = confluent_kafka.Consumer(conf)
    c.subscribe([topic])

    max_msgcnt = 1000000
    bytecnt = 0
    msgcnt = 0

    print('Will now consume %d messages' % max_msgcnt)

    if with_progress:
        bar = Bar('Consuming',
                  max=max_msgcnt,
                  suffix='%(index)d/%(max)d [%(eta_td)s]')
    else:
        bar = None

    while not good_stats_cb_result:
        # Consume until EOF or error

        msg = c.poll(timeout=20.0)
        if msg is None:
            raise Exception(
                'Stalled at %d/%d message, no new messages for 20s' %
                (msgcnt, max_msgcnt))

        if msg.error():
            raise confluent_kafka.KafkaException(msg.error())

        bytecnt += len(msg)
        msgcnt += 1

        if bar is not None and (msgcnt % 10000) == 0:
            bar.next(n=10000)

        if msgcnt == 1:
            t_first_msg = time.time()
        if msgcnt >= max_msgcnt:
            break

    if bar is not None:
        bar.finish()

    if msgcnt > 0:
        t_spent = time.time() - t_first_msg
        print('%d messages (%.2fMb) consumed in %.3fs: %d msgs/s, %.2f Mb/s' %
              (msgcnt, bytecnt / (1024 * 1024), t_spent, msgcnt / t_spent,
               (bytecnt / t_spent) / (1024 * 1024)))

    print('closing consumer')
    c.close()
Exemple #33
0
    def _train_one_epoch(self):
        bar = Bar('Processing', max=len(self.train_data))
        for step, (data, label) in enumerate(self.train_data):

            self.sigma = hm_kernel_size(self.hm_type,
                                        self.last_epoch,
                                        threshold=4)
            target = gene_heatmap(label, self.sigma)
            inputs = Variable(data)
            target = Variable(t.from_numpy(target))
            if len(self.params.gpus) > 0:
                inputs = inputs.cuda()
                target = target.type(t.FloatTensor).cuda()

            # forward
            score = self.model(inputs)
            loss = 0

            # stack hourglass
            for s in range(len(score)):
                loss += self.criterion(score[s], target)
            loss = loss / len(score)

            # simple pose res
            # loss = self.criterion(score[1], target)

            # backward
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step(None)

            # meters update
            self.loss_meter.add(loss.item())

            # evaluation: calculate PCKh
            predictions = spatial_soft_argmax2d(score[len(score) - 1], 1000,
                                                False).cpu().numpy().reshape(
                                                    -1, 2)
            targetcoors = label.numpy().reshape(-1, 2)
            steppckh, steperr = evalPCKh(predictions,
                                         targetcoors,
                                         threshold=50,
                                         alpha=0.2)

            # tensorboard show
            if step % 500 == 0:
                target_shows = t.sum(target[0], 0)
                target_shows[target_shows > 1] = 1
                self.writer.add_image('train/input', inputs[0],
                                      self.last_epoch)
                self.writer.add_image('train/taget',
                                      target_shows,
                                      self.last_epoch,
                                      dataformats='HW')
                self.writer.add_image('train/output',
                                      t.sum(score[1][0], 0),
                                      self.last_epoch,
                                      dataformats='HW')

            bar.suffix = 'Train: [%(index)d/%(max)d] | Epoch: [{0}/{1}]| Loss: {loss:6f} | PCKh: {pckh:4f} | AveErr: {err:.2f} pixel |'.format(
                self.last_epoch,
                self.params.max_epoch,
                loss=loss,
                pckh=steppckh,
                err=steperr)
            bar.next()
        bar.finish()
Exemple #34
0
def prefetch_test(opt):
    if not opt.not_set_cuda_env:
        os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
    Dataset = dataset_factory[opt.test_dataset]
    opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
    print(opt)
    Logger(opt)

    split = 'val' if not opt.trainval else 'test'
    dataset = Dataset(opt, split)
    detector = Detector(opt)

    if opt.load_results != '':
        load_results = json.load(open(opt.load_results, 'r'))
        for img_id in load_results:
            for k in range(len(load_results[img_id])):
                if load_results[img_id][k][
                        'class'] - 1 in opt.ignore_loaded_cats:
                    load_results[img_id][k]['score'] = -1
    else:
        load_results = {}

    data_loader = torch.utils.data.DataLoader(PrefetchDataset(
        opt, dataset, detector.pre_process),
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=1,
                                              pin_memory=True)

    results = {}
    num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters
    bar = Bar('{}'.format(opt.exp_id), max=num_iters)
    time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge', 'track']
    avg_time_stats = {t: AverageMeter() for t in time_stats}
    if opt.use_loaded_results:
        for img_id in data_loader.dataset.images:
            results[img_id] = load_results['{}'.format(img_id)]
        num_iters = 0
    for ind, (img_id, pre_processed_images) in enumerate(data_loader):
        if ind >= num_iters:
            break
        if opt.tracking and ('is_first_frame' in pre_processed_images):
            if '{}'.format(int(img_id.numpy().astype(
                    np.int32)[0])) in load_results:
                pre_processed_images['meta']['pre_dets'] = \
                    load_results['{}'.format(int(img_id.numpy().astype(np.int32)[0]))]
            else:
                print()
                print('No pre_dets for',
                      int(img_id.numpy().astype(np.int32)[0]),
                      '. Use empty initialization.')
                pre_processed_images['meta']['pre_dets'] = []
            detector.reset_tracking()
            print('Start tracking video',
                  int(pre_processed_images['video_id']))
        if opt.public_det:
            if '{}'.format(int(img_id.numpy().astype(
                    np.int32)[0])) in load_results:
                pre_processed_images['meta']['cur_dets'] = \
                    load_results['{}'.format(int(img_id.numpy().astype(np.int32)[0]))]
            else:
                print('No cur_dets for',
                      int(img_id.numpy().astype(np.int32)[0]))
                pre_processed_images['meta']['cur_dets'] = []

        ret = detector.run(pre_processed_images)
        results[int(img_id.numpy().astype(np.int32)[0])] = ret['results']

        Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format(
            ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td)
        # for t in avg_time_stats:
        #     avg_time_stats[t].update(ret[t])
        #     Bar.suffix = Bar.suffix + '|{} {tm.val:.3f}s ({tm.avg:.3f}s) '.format(
        #         t, tm = avg_time_stats[t])
        if opt.print_iter > 0:
            if ind % opt.print_iter == 0:
                print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix))
        else:
            bar.next()
    bar.finish()
    if opt.save_results:
        print(
            'saving results to',
            opt.save_dir + '/save_results_{}{}.json'.format(
                opt.test_dataset, opt.dataset_version))
        json.dump(
            _to_list(copy.deepcopy(results)),
            open(
                opt.save_dir + '/save_results_{}{}.json'.format(
                    opt.test_dataset, opt.dataset_version), 'w'))
    dataset.run_eval(results, opt.data_dir, opt.save_dir)
    def fit(self, train_domain, num_epochs, patience, optimizer, train_dir,
            dev_dir):
        """
        Trains the model.
        :param train_domain: the domain used for training
        :param num_epochs: the max number of epochs the model should be trained
        :param patience: the patience to use for early stopping
        :param optimizer: the optimizer that should be used
        :param train_dir: the directory containing the training files
        :param dev_dir: the directory containing the development files
        """
        print("Reading training data from %s..." % train_dir, flush=True)
        print("Tasks: %s" % self.task_names)
        train_X, train_Y, _, _, word2id, char2id, task2t2i = get_data(
            [train_domain], self.task_names, data_dir=train_dir, train=True)

        # get the development data of the same domain
        dev_X, dev_Y, org_X, org_Y, _, _, _ = get_data(
            [train_domain], self.task_names, word2id, char2id, task2t2i,
            data_dir=dev_dir, train=False)
        print('Length of training data:', len(train_X), flush=True)
        print('Length of validation data:', len(dev_X), flush=True)

        # store mappings of words and tags to indices
        self.set_indices(word2id, char2id, task2t2i)
        num_words = len(self.word2id)
        num_chars = len(self.char2id)

        print('Building the computation graph...', flush=True)
        self.predictors, self.char_rnn, self.wembeds, self.cembeds = \
            self.build_computation_graph(num_words, num_chars)

        if optimizer == SGD:
            trainer = dynet.SimpleSGDTrainer(self.model)
        elif optimizer == ADAM:
            trainer = dynet.AdamTrainer(self.model)
        else:
            raise ValueError('%s is not a valid optimizer.' % optimizer)

        train_data = list(zip(train_X, train_Y))

        num_iterations = 0
        num_epochs_no_improvement = 0
        best_dev_acc = 0

        print('Training model with %s for %d epochs and patience of %d.'
              % (optimizer, num_epochs, patience))
        for epoch in range(num_epochs):
            print('', flush=True)
            bar = Bar('Training epoch %d/%d...' % (epoch+1, num_epochs),
                      max=len(train_data), flush=True)

            # keep track of the # of updates, total loss, and total # of
            # predicted instances per task
            task2num_updates = {task: 0 for task in self.task_names}
            task2total_loss = {task: 0.0 for task in self.task_names}
            task2total_predicted = {task: 0.0 for task in self.task_names}
            total_loss = 0.0
            total_penalty = 0.0
            total_predicted = 0.0
            random.shuffle(train_data)

            # for every instance, we optimize the loss of the corresponding task
            for (word_indices, char_indices), task2label_id_seq in train_data:
                # get the concatenated word and char-based features for every
                # word in the sequence
                features = self.get_word_char_features(word_indices, char_indices)
                for task, y in task2label_id_seq.items():
                    placeholder_idx = self.task2tag2idx[task]['_'] if '_' in self.task2tag2idx[task] else -1
                    if task in [POS, CHUNK, NER, SRL]:
                        output, penalty = self.predict(features, task, train=True)
                        neg_logs = [pick_neg_log(pred, gold) for pred, gold in zip(output, y)]
                    elif task in [STUTT, SAARB, TSVET, VUAMC, STUTT_M, SAARB_M, TSVET_M, VUAMC_M]:
                        output, penalty = self.predict(features, task, train=True)
                        # get the index for the placeholder label; if there is no placeholder in the data, set to -1, so that every sample is regarded
                        neg_logs = [pick_neg_log(pred, gold) for pred, gold in zip(output, y) if gold != placeholder_idx]
                        # if the sentence does not contain any literal or metaphor samples, skip it; do not consider it for training
                        if not neg_logs:
                            continue
                    else:
                        raise NotImplementedError('Task %s has not been '
                                                  'implemented yet.' % task)

#                    labels = {v: k for k,v in self.task2tag2idx[task].items()}
                    loss = dynet.esum(neg_logs)
#                    loss = dynet.esum([pick_neg_log(pred, gold) for pred, gold
#                                       in zip(output, y)])
                    lv = loss.value()
                    # sum the loss and the subspace constraint penalty
                    combined_loss = loss + dynet.parameter(
                        self.constraint_weight_param, update=False) * penalty
                    total_loss += lv
                    total_penalty += penalty.value()
                    assert len(output) == len(y)
                    total_predicted += len([1 for gold in y if gold != placeholder_idx])
                    task2total_loss[task] += lv
                    task2total_predicted[task] += len([1 for gold in y if gold != placeholder_idx])
                    task2num_updates[task] += 1

                    # back-propagate through the combined loss
                    combined_loss.backward()
                    trainer.update()
                bar.next()
                num_iterations += 1

            print("\nEpoch %d. Total loss: %.3f. Total penalty: %.3f. Losses: "
                  % (epoch, total_loss / total_predicted,
                     total_penalty / total_predicted), end='', flush=True)
            for task in task2total_loss.keys():
                if task2total_predicted[task] == 0:
                    print('%s: %.3f/%.3f. ' % (task, task2total_loss[task],
                                          task2total_predicted[task]),
                          end='', flush=True)
                else:
                    print('%s: %.3f. ' % (task, task2total_loss[task] /
                                          task2total_predicted[task]),
                          end='', flush=True)
            print('', flush=True)

            # evaluate after every epoch
            dev_acc = self.evaluate(dev_X, dev_Y, org_X=None, mode='nope')

            if dev_acc > best_dev_acc:
                print('Main task %s dev acc %.4f is greater than best dev acc '
                      '%.4f...' % (self.main_task, dev_acc, best_dev_acc),
                      flush=True)
                best_dev_acc = dev_acc
                num_epochs_no_improvement = 0
                print('Saving model to directory %s...' % self.model_dir,
                      flush=True)
                self.save()
                self.evaluate(dev_X, dev_Y, org_X=org_X, mode='dev')
            else:
                print('Main task %s dev acc %.4f is lower than best dev acc '
                      '%.4f...' % (self.main_task, dev_acc, best_dev_acc),
                      flush=True)
                num_epochs_no_improvement += 1
            if num_epochs_no_improvement == patience:
                print('Early stopping...', flush=True)
                print('Loading the best performing model from %s...'
                      % self.model_dir, flush=True)
                self.model.load(self.model_file)
                break
def getSingleTraining(file):
    path = os.path.abspath(file)
    pos = path.rfind('/')
    tokens = path[pos + 1:].split('_')
    descriptor_id = tokens[6]
    scene_name = tokens[2]
    scene_name = path[:pos] + '/' + scene_name + '_d.pcd'
    file_descriptor = path[:pos] + '/tmp' + descriptor_id + '.csv'
    labels = np.genfromtxt(file_descriptor,
                           dtype='str',
                           skip_header=1,
                           delimiter=',')
    print('Affordances in descriptor %d' % labels.shape[0])
    fileId = tokens[-1]
    tokens = fileId.split('.')
    fileId = tokens[0]
    # print(fileId)
    # # Need only those affordances that have
    # # over 128 good predictions in this result file

    # res_data_file=path[:pos]+'/'+fileId+'_goodPointsX.pcd'
    # res_points_file=path[:pos]+'/'+fileId+'_goodPoints.pcd'

    # data=load_pcd_data(res_data_file,cols=None)
    # #print(data.shape)
    # points,real_c_data=load_pcd_data_binary(res_points_file)
    # #real_c_data=load_pcd_data(res_points_file,cols=(3,),dataType=np.uint32)
    # #real_c_data=np.array(colors[:,-1],dtype=np.int32)
    # red=np.array((real_c_data>>16)& 0x0000ff,dtype=np.uint8).reshape(-1,1)
    # green=np.array((real_c_data>>8)& 0x0000ff,dtype=np.uint8).reshape(-1,1)
    # blue=np.array((real_c_data)& 0x0000ff,dtype=np.uint8).reshape(-1,1)

    # real_c_data=np.concatenate((red,green,blue),axis=1)

    # perPoint=np.sum(real_c_data,axis=1)
    # bounds=np.cumsum(perPoint)
    # #print(bounds)
    # howMany=np.zeros((labels.shape[0],1),dtype=np.int32)
    # all_data=np.zeros((data.shape[0],6))

    # for i in range(all_data.shape[0]):
    # 	point_id=np.nonzero(bounds>i)[0][0]
    # 	all_data[i,:3]=points[point_id,:]
    # 	all_data[i,3:]=data[i,:3]

    # for i in range(labels.shape[0]):
    # 	success=np.nonzero(all_data[:,3]==i)[0]
    # 	#success2=np.nonzero(all_data[success,2]>0.2)[0]
    # 	howMany[i]=success.size

    # ids_target=np.nonzero(howMany>n_samples)[0]
    # print('Real found: %d'%ids_target.size)
    # print(ids_target)
    #sys.exit()

    new_c = np.genfromtxt('filtered_counts2.csv', delimiter=',', dtype='int')
    with open('file_lists2.csv', 'r') as f:
        reader = csv.reader(f)
        new_n = list(reader)

    samples = 32
    points = 4096
    ids_target = np.nonzero(new_c >= samples)[0]
    print('Actually using %d affordances' % (ids_target.size))

    fig = plt.figure()
    plt.ion()
    ax = fig.add_subplot(121, projection='3d')
    ax2 = fig.add_subplot(122, projection='3d')
    unique_scenes = dict()
    k = 10
    #ax.hold(False)
    if k > 1:
        bar = Bar('Creating original single example training dataset',
                  max=ids_target.shape[0])
        for i in range(ids_target.shape[0]):
            interaction = ids_target[i]
            path_to_data = os.path.abspath('../data')
            name = path_to_data + '/affordances/binaryOc_AffordancesDataset_train' + str(
                interaction) + '_' + str(TRAIN_EXAMPLES) + '.h5'
            if os.path.exists(name):
                continue
            #find training data
            aff_dir = labels[interaction, 0]
            query_object = labels[interaction, 2]
            data_file = path[:pos] + "/" + aff_dir + "/ibs_full_" + labels[
                interaction, 1] + "_" + query_object + ".txt"
            with open(data_file) as f:
                content = f.readlines()
                # you may also want to remove whitespace characters like `\n` at the end of each line
            content = [x.strip() for x in content]
            scene_file = content[0].split(":")[1]
            tmp = content[8].split(":")[1]
            datapoint = tmp.split(',')
            test_point = np.expand_dims(np.asarray(
                [float(x) for x in datapoint]),
                                        axis=0)
            data_file = path[:pos] + "/" + aff_dir + "/" + scene_file
            if '.pcd' in scene_file or '.ply' in scene_file:
                if os.path.exists(data_file):
                    data_file = data_file
            else:
                try_data_file = data_file + '.ply'
                if os.path.exists(try_data_file):
                    #print(try_data_file)
                    data_file = try_data_file
                #maybe pcd extension missing
                else:
                    try_data_file = data_file + '.pcd'
                    if os.path.exists(try_data_file):
                        data_file = try_data_file
            # if scene_file not in unique_scenes:
            # 	unique_scenes[scene_file]=interaction
            # else:
            # 	continue
            if '.pcd' in data_file:
                cloud_training = load_pcd_data(data_file)
            else:
                cloud_training = load_ply_data(data_file)
            data = np.zeros((2, n_points, 3), dtype=np.float32)
            data_labels = np.zeros((2, 1), dtype=np.int32)
            boundingBoxDiag = np.linalg.norm(
                np.min(cloud_training, 0) - np.max(cloud_training, 0))
            #print('%s Diagonal %f Points %d'%(scene_file,boundingBoxDiag,cloud_training.shape[0]))
            #sample a voxel with rad from test-point
            kdt = BallTree(cloud_training, leaf_size=5, metric='euclidean')
            voxel_ids = getVoxel(test_point, max_rad, kdt)
            voxel = cloud_training[voxel_ids, :]
            sample = sample_cloud(voxel, n_points)
            sample_cloud_training = sample_cloud(cloud_training, n_points * 2)
            #genereate a negative example with noise around test_point
            low = test_point[0, 0] - max_rad
            high = test_point[0, 0] + max_rad
            tmp1 = (high - low) * np.random.random_sample(
                (n_points, 1)) + (low)
            low = test_point[0, 1] - max_rad
            high = test_point[0, 1] + max_rad
            tmp2 = (high - low) * np.random.random_sample(
                (n_points, 1)) + (low)
            low = test_point[0, 2] - max_rad
            high = test_point[0, 2] + max_rad
            tmp3 = (high - low) * np.random.random_sample(
                (n_points, 1)) + (low)
            negative_cloud_training = np.concatenate((tmp1, tmp2, tmp3),
                                                     axis=1)
            data[0, ...] = sample - test_point
            data_labels[0, ...] = np.zeros((1, 1), dtype=np.int32)
            data[1, ...] = negative_cloud_training - test_point
            data_labels[1, ...] = np.ones((1, 1), dtype=np.int32)
            #name=path_to_data+'/affordances/binaryOc_AffordancesDataset_train'+str(interaction)+'_'+str(TRAIN_EXAMPLES)+'.h5'
            #print(name)
            save_h5(name, data, data_labels, 'float32', 'uint8')
            ax.scatter(sample_cloud_training[:, 0],
                       sample_cloud_training[:, 1],
                       sample_cloud_training[:, 2],
                       s=1,
                       c='b')
            ax.scatter(sample[:, 0], sample[:, 1], sample[:, 2], s=3, c='b')
            ax2.scatter(negative_cloud_training[:, 0],
                        negative_cloud_training[:, 1],
                        negative_cloud_training[:, 2],
                        s=3,
                        c='r')
            plt.pause(1)
            plt.draw()
            ax.clear()
            ax2.clear()
            bar.next()
        bar.finish()
    name = '../data/affordances/names.txt'
    with open(name, "w") as text_file:
        for i in range(ids_target.shape[0]):
            text_file.write(
                "%d:%s-%s\n" %
                (i, labels[ids_target[i], 0], labels[ids_target[i], 2]))
def computeResultStats(descriptor_id):
    file_ids = getResults(descriptor_id)
    print('Found %d actual results' % (len(file_ids)))
    path = os.path.abspath(result_dirs[0])
    print(path)
    file_descriptor = path + '/tmp' + str(descriptor_id) + '.csv'
    labels = np.genfromtxt(file_descriptor,
                           dtype='str',
                           skip_header=1,
                           delimiter=',')
    print('Affordances in descriptor %d' % labels.shape[0])
    counts = np.zeros((labels.shape[0], 1), dtype=np.int32)
    countsFile = "Counts_" + str(descriptor_id) + ".csv"
    if not 'some_counts' in globals():
        # collect some data about affordances found here
        counter = 0
        bar = Bar('Creating new data', max=len(file_ids))
        for file_id in file_ids:
            #read results
            some_results = file_ids[file_id] + file_id + "_goodPointsX.pcd"
            #print('File to read: %s'%some_results)
            some_results_points = file_ids[
                file_id] + file_id + "_goodPoints.pcd"
            newDataName = file_ids[file_id] + file_id + "_newData.csv"
            #if not os.path.exists(newDataName):
            try:
                # read_routine=1
                # with open(some_results_points) as fp:
                # 	for i, line in enumerate(fp):
                # 		if i == 10:
                # 			words=line.split(" ")
                # 			if words[1]!="ascii":
                # 				read_routine=2
                # 			break
                data, _ = load_pcd_data_binary(some_results)
                points, real_c_data = load_pcd_data_binary(some_results_points)
            except Exception as e:
                print('Encoding error in %s' % (file_ids[file_id] + file_id))
                continue
                bar.next()

            #real_c_data=np.array(colors[:,-1],dtype=np.int32)
            red = np.array((real_c_data >> 16) & 0x0000ff,
                           dtype=np.uint8).reshape(-1, 1)
            green = np.array((real_c_data >> 8) & 0x0000ff,
                             dtype=np.uint8).reshape(-1, 1)
            blue = np.array((real_c_data) & 0x0000ff,
                            dtype=np.uint8).reshape(-1, 1)
            real_c_data = np.concatenate((red, green, blue), axis=1)
            perPoint = np.sum(real_c_data, axis=1)
            bounds = np.cumsum(perPoint)

            #Only get points above a height
            minZ = np.min(points[:, 2])

            all_data = np.zeros((data.shape[0], 6))
            start_id = 0
            end_id = bounds[0]
            for i in range(bounds.shape[0]):
                if i > 0:
                    start_id = bounds[i - 1]
                else:
                    start_id = 0
                end_id = bounds[i]
                all_data[start_id:end_id, :3] = points[i, :]
                all_data[start_id:end_id, 3:] = data[start_id:end_id, :3]

            valid_ids = np.nonzero(all_data[:, Z] >= (minZ + 0.3))[0]
            data = all_data[valid_ids, :]
            np.savetxt(newDataName, data, delimiter=",", fmt='%1.6f')
            #else:
            #data=np.genfromtxt(newDataName,delimiter=",",dtype='float32')
            #np.savetxt(newDataName,data,delimiter=",",fmt='%1.6f')
            counter += 1
            counts_tmp = np.bincount(data[:, A_ID].astype(int),
                                     minlength=counts.shape[0])
            counts_tmp = np.expand_dims(counts_tmp, axis=1)
            counts += counts_tmp
            bar.next()
        bar.finish()
    else:
        counts = some_counts
    with open(countsFile, "w") as text_file:
        for i in range(labels.shape[0]):
            text_file.write("%d,%s-%s,%d\n" %
                            (i, labels[i, 0], labels[i, 2], counts[i]))
Exemple #38
0
    def validate(self):
        self.model.eval()

        start = time.time()

        summary_string = ''

        bar = Bar('Validation', fill='#', max=len(self.test_loader))

        if self.evaluation_accumulators is not None:
            for k,v in self.evaluation_accumulators.items():
                self.evaluation_accumulators[k] = []

        J_regressor = torch.from_numpy(np.load(osp.join(VIBE_DATA_DIR, 'J_regressor_h36m.npy'))).float()

        for i, target in enumerate(self.test_loader):

            # video = video.to(self.device)
            move_dict_to_device(target, self.device)

            # <=============
            with torch.no_grad():
                inp = target['features']
                
                # preds = self.model(inp, J_regressor=J_regressor, refiner = self.refiner)
                preds = self.model(inp, J_regressor=J_regressor)

                # convert to 14 keypoint format for evaluation
                # if self.use_spin:
                n_kp = preds[-1]['kp_3d'].shape[-2]
                pred_j3d = preds[-1]['kp_3d'].view(-1, n_kp, 3).cpu().numpy()
                target_j3d = target['kp_3d'].view(-1, n_kp, 3).cpu().numpy()
                pred_verts = preds[-1]['verts'].view(-1, 6890, 3).cpu().numpy()
                target_theta = target['theta'].view(-1, 85).cpu().numpy()

                ######################## vis #####################
                # renderer = smpl_renderer.SMPL_Renderer(image_size = 400, camera_mode="look_at")
                # target_pose = target_theta[:,3:75]
                # pred_pose = preds[-1]['theta'][:,:,3:75].squeeze()
                # renderer.render_pose_vid(torch.tensor(target_pose), out_file_name = "output/gt{:02d}.mp4".format(i), random_camera = 2, random_shape=False)
                # renderer.render_pose_vid(torch.tensor(pred_pose), out_file_name = "output/ref{:02d}.mp4".format(i), random_camera = 2, random_shape=False)

                ######################## vis #####################


                self.evaluation_accumulators['pred_verts'].append(pred_verts)
                self.evaluation_accumulators['target_theta'].append(target_theta)

                self.evaluation_accumulators['pred_j3d'].append(pred_j3d)
                self.evaluation_accumulators['target_j3d'].append(target_j3d)

                del target, preds
                torch.cuda.empty_cache()
            # =============>

            batch_time = time.time() - start

            summary_string = f'({i + 1}/{len(self.test_loader)}) | batch: {batch_time * 10.0:.4}ms | ' \
                             f'Total: {bar.elapsed_td} | ETA: {bar.eta_td:}'

            bar.suffix = summary_string
            bar.next()

        bar.finish()

        logger.info(summary_string)
            model.zero_grad()
            predictions = model(text_chunk)

            # for each bptt size we have the same batch_labels
            loss = criterion(predictions, batch_label)
            bptt_loss += loss.item()

            # do back propagation for bptt steps in time
            loss.backward()
            optimizer.step()
            # after doing back prob, detach rnn state in order to implement TBPTT (truncated backpropagation through time startegy)
            # now rnn_state was detached and chain of gradeints was broken
            model.repackage_rnn_state()

        bar.next()
        epoch_loss += bptt_loss
    bar.finish()
    # mean epoch loss
    epoch_loss = epoch_loss / len(train_iter)

    time_elapsed = datetime.now() - start_time

    # progress
    bar = Bar(f'Validation Epoch {e}/{epoch}', max=len(valid_iter))
    # evaluation loop
    model.eval()
    with torch.no_grad():
        for batch_idx, batch in enumerate(valid_iter):
            # print(f'batch_idx={batch_idx}')
            batch_text = batch.text[0]  #batch.text is a tuple
Exemple #40
0
            if metric < best_metric:
                best_metric = metric
                best_image = image
                best_triangle = list(triangle)
    if not best_image == None:
        # print "Best image (hard): %d (%d)" % (best_image, best_connections)
        # print "  ", best_triangle
        best_image.tris.append(best_triangle)
        good_tris += 1
        done = True
    if not done:
        # print "failed triangle"
        failed_tris += 1
    count += 1
    if count % update_steps == 0:
        bar.next(update_steps)
bar.finish()

print "good tris =", good_tris
print "failed tris =", failed_tris

# write out an ac3d file
name = args.project + "/sba3d.ac"
f = open(name, "w")
f.write("AC3Db\n")
trans = 0.0
f.write(
    "MATERIAL \"\" rgb 1 1 1  amb 0.6 0.6 0.6  emis 0 0 0  spec 0.5 0.5 0.5  shi 10  trans %.2f\n"
    % (trans))
f.write("OBJECT world\n")
f.write("kids " + str(len(proj.image_list)) + "\n")
Exemple #41
0
def validate(loader, model, criterion, netType, debug, flip):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    acces = AverageMeter()
    end = time.time()

    # predictions
    predictions = torch.Tensor(loader.dataset.__len__(), 68, 2)

    model.eval()
    gt_win, pred_win = None, None
    bar = Bar('Validating', max=len(loader))
    all_dists = torch.zeros((68, loader.dataset.__len__()))
    for i, (inputs, target, meta) in enumerate(loader):
        data_time.update(time.time() - end)

        input_var = torch.autograd.Variable(inputs.cuda())
        target_var = torch.autograd.Variable(target.cuda(async=True))

        output = model(input_var)
        score_map = output[-1].data.cpu()

        if flip:
            flip_input_var = torch.autograd.Variable(
                torch.from_numpy(shufflelr(
                    inputs.clone().numpy())).float().cuda())
            flip_output_var = model(flip_input_var)
            flip_output = flip_back(flip_output_var[-1].data.cpu())
            score_map += flip_output

        # intermediate supervision
        loss = 0
        for o in output:
            loss += criterion(o, target_var)
        acc, batch_dists = accuracy(score_map, target.cpu(), idx, thr=0.07)
        all_dists[:, i * args.val_batch:(i + 1) * args.val_batch] = batch_dists

        #preds = final_preds(score_map, meta['center'], meta['scale'], meta['reference_scale'], [64, 64])
        pts, pts_img = get_preds_fromhm(score_map, meta['center'],
                                        meta['scale'], meta['reference_scale'])
        preds = pts_img
        for n in range(score_map.size(0)):
            predictions[meta['index'][n], :, :] = preds[n, :, :]

        if debug:
            gt_batch_img = batch_with_heatmap(inputs, target)
            pred_batch_img = batch_with_heatmap(inputs, score_map)
            if not gt_win or not pred_win:
                plt.subplot(121)
                gt_win = plt.imshow(gt_batch_img)
                plt.subplot(122)
                pred_win = plt.imshow(pred_batch_img)
            else:
                gt_win.set_data(gt_batch_img)
                pred_win.set_data(pred_batch_img)
            plt.pause(.05)
            plt.draw()

        losses.update(loss.data[0], inputs.size(0))
        acces.update(acc[0], inputs.size(0))

        batch_time.update(time.time() - end)
        end = time.time()

        bar.suffix = '({batch}/{size}) Data: {data:.6f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Acc: {acc: .4f}'.format(
            batch=i + 1,
            size=len(loader),
            data=data_time.val,
            bt=batch_time.val,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=losses.avg,
            acc=acces.avg)
        bar.next()

    bar.finish()
    mean_error = torch.mean(all_dists)
    auc = calc_metrics(all_dists)  # this is auc of predicted maps and target.
    print("=> Mean Error: {:.2f}, [email protected]: {} based on maps".format(
        mean_error * 100., auc))
    sys.stdout.flush()
    return losses.avg, acces.avg, predictions, auc
Exemple #42
0
def train(loader,
          model,
          criterion,
          optimizer,
          netType,
          debug=False,
          flip=False):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    acces = AverageMeter()

    model.train()
    end = time.time()

    gt_win, pred_win = None, None
    bar = Bar('Training', max=len(loader))
    for i, (inputs, target) in enumerate(loader):
        data_time.update(time.time() - end)

        input_var = torch.autograd.Variable(inputs.cuda())
        target_var = torch.autograd.Variable(target.cuda(async=True))

        if debug:
            gt_batch_img = batch_with_heatmap(inputs, target)
            # pred_batch_img = batch_with_heatmap(inputs, score_map)
            if not gt_win or not pred_win:
                plt.subplot(121)
                gt_win = plt.imshow(gt_batch_img)
                # plt.subplot(122)
                # pred_win = plt.imshow(pred_batch_img)
            else:
                gt_win.set_data(gt_batch_img)
                # pred_win.set_data(pred_batch_img)
            plt.pause(.05)
            plt.draw()

        output = model(input_var)
        score_map = output[-1].data.cpu()

        if flip:
            flip_input_var = torch.autograd.Variable(
                torch.from_numpy(shufflelr(
                    inputs.clone().numpy())).float().cuda())
            flip_output_var = model(flip_input_var)
            flip_output = flip_back(flip_output_var[-1].data.cpu())
            score_map += flip_output

        # intermediate supervision
        loss = 0
        for o in output:
            loss += criterion(o, target_var)
        acc, _ = accuracy(score_map, target.cpu(), idx, thr=0.07)

        losses.update(loss.data[0], inputs.size(0))
        acces.update(acc[0], inputs.size(0))

        optimizer.zero_grad()
        #loss.backward(retain_graph=True)
        loss.backward()
        optimizer.step()

        batch_time.update(time.time() - end)
        end = time.time()
        bar.suffix = '({batch}/{size}) Data: {data:.6f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Acc: {acc: .4f}'.format(
            batch=i + 1,
            size=len(loader),
            data=data_time.val,
            bt=batch_time.val,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=losses.avg,
            acc=acces.avg)
        bar.next()

    bar.finish()

    return losses.avg, acces.avg
def main():
    #Get command line input:
    try:
        conllinputfile = sys.argv[1]
        text_id = sys.argv[2]
        dbname = sys.argv[3]
        sl_dbtablename = sys.argv[4]
        tl_dbtablename = sys.argv[5]
    except:
        print('''Usage: {} 
        <path to target language conll formatted text>
        <text id of the inserted source language text>
        <database name>
        <source language database table name>
        <target language database table name>
        '''.format(sys.argv[0]))
        sys.exit(0)

    #Connect to the database
    con = psycopg(dbname, 'juho')
    #read the conll data
    with open(conllinputfile, 'r') as f:
        conllinput = f.read()

    #fetch the id of the pair that is already inserted
    text_id = con.FetchQuery(
        "SELECT id FROM {} WHERE id = %s".format('text_ids'), (text_id, ))
    try:
        text_id = text_id[0][0]
    except IndexError:
        raise MissingTextError('No such id in the text_ids table')

    #Get all the align ids that were inserted with the first file
    align_ids = con.FetchQuery(
        "SELECT DISTINCT align_id FROM {} WHERE text_id = %s order by align_id"
        .format(sl_dbtablename), (text_id, ))

    # Split the translation file into aligned segments according to the !!!! -notation
    splitpattern = re.compile(
        r"\d+\t![^\n]+\n\n?\d+\t![^\n]+\n\n?\d+\t![^\n]+\n\n?\d+\t![^\n]+\n\n")
    alignsegments = re.split(splitpattern, conllinput)
    #Filter out empty align segments
    alignsegments = TrimList(alignsegments)

    #Test that same number of segments
    if len(alignsegments) != len(align_ids):
        raise AlignMismatch(
            'The number of segments differs from the number in the source text: {}/{}'
            .format(len(alignsegments), len(align_ids)))

    #Get the current maximum indices:
    sentence_id = GetLastValue(
        con.FetchQuery(
            "SELECT max(sentence_id) FROM {}".format(tl_dbtablename)))
    #Insert a new entry in the translation_ids table
    translator = input('Give the author for this translation:\n')
    con.query(
        "INSERT INTO translation_ids (translator, sourcetext_id) VALUES(%s, %s)",
        (
            translator,
            text_id,
        ),
        commit=True)
    translation_id = GetLastValue(
        con.FetchQuery(
            "SELECT max(id) FROM translation_ids WHERE sourcetext_id = %(sid)s",
            {'sid': text_id}))

    #Initialize variales for db insertion
    rowlist = list()
    bar = Bar('Preparing the data for insertion into the database',
              max=len(alignsegments))

    #================================================================================
    for idx, align_id in enumerate(align_ids):
        align_id = align_id[0]
        segment = alignsegments[idx]
        #Split each segment into lines (line=word with all the morphological and syntactic information)
        words = segment.splitlines()
        sentence_id += 1
        for word in words:
            #read all the information about the word
            if word == '':
                #empty lines are sentence breaks
                sentence_id += 1
            else:
                columns = word.split('\t')
                if len(columns) < 7:
                    #If an empty segment encountered
                    print('Note: an empty segment encountered at align_id {}'.
                          format(align_id))
                    rowlist.append({
                        'align_id': align_id,
                        'sentence_id': sentence_id,
                        'text_id': text_id,
                        'translation_id': translation_id,
                        'tokenid': 1,
                        'token': 'EMPTYSEGMENT',
                        'lemma': 'EMPTYSEGMENT',
                        'pos': 'EMPTYSEGMENT',
                        'feat': 'EMPTYSEGMENT',
                        'head': 0,
                        'deprel': 'EMPTY'
                    })
                else:
                    #If this is a word with information, initialize a new row
                    if sl_dbtablename == 'fi_conll':
                        rowlist.append({
                            'align_id': align_id,
                            'sentence_id': sentence_id,
                            'text_id': text_id,
                            'translation_id': translation_id,
                            'tokenid': columns[0],
                            'token': columns[1],
                            'lemma': columns[2],
                            'pos': columns[4],
                            'feat': columns[5],
                            'head': columns[6],
                            'deprel': columns[7]
                        })

                    elif sl_dbtablename == 'ru_conll':
                        rowlist.append({
                            'align_id': align_id,
                            'sentence_id': sentence_id,
                            'text_id': text_id,
                            'translation_id': translation_id,
                            'tokenid': columns[0],
                            'token': columns[1],
                            'lemma': columns[2],
                            'pos': columns[4],
                            'feat': columns[6],
                            'head': columns[8],
                            'deprel': columns[10]
                        })
        bar.next()
    #================================================================================

    bar.finish()
    print('\nInserting to database, this might take a while...')
    con.BatchInsert(tl_dbtablename, rowlist)
    print('Done. Inserted {} rows.'.format(con.cur.rowcount))
Exemple #44
0
    def validate(self, epoch):

        self.current_epoch = epoch
        batch_time = AverageMeter()
        data_time = AverageMeter()
        psnres = AverageMeter()
        ssimes = AverageMeter()
        lossMask8s = AverageMeter()
        lossMask4s = AverageMeter()
        lossMask2s = AverageMeter()

        # switch to evaluate mode
        self.model.eval()

        end = time.time()
        bar = Bar('Processing {} '.format(self.args.arch),
                  max=len(self.val_loader))

        with torch.no_grad():
            for i, (inputs, target) in enumerate(self.val_loader):

                input_image, mask, m2s, m4s, m8s = inputs

                current_index = len(self.train_loader) * epoch + i
                valid = torch.ones(
                    (input_image.size(0), self.patch, self.patch),
                    requires_grad=False).cuda()
                fake = torch.zeros(
                    (input_image.size(0), self.patch, self.patch),
                    requires_grad=False).cuda()

                reverse_mask = 1 - mask

                if self.args.gpu:
                    input_image = input_image.cuda()
                    mask = mask.cuda()
                    m2s = m2s.cuda()
                    m4s = m4s.cuda()
                    m8s = m8s.cuda()
                    reverse_mask = reverse_mask.cuda()
                    target = target.cuda()
                    valid.cuda()
                    fake.cuda()

                # 32,64,128
                output, mask8s, mask4s, mask2s = self.model(
                    torch.cat((input_image, mask), 1))

                output = deNorm(output)
                target = deNorm(target)

                masked_loss8s = self.attentionLoss8s(mask8s, m8s)
                masked_loss4s = self.attentionLoss4s(mask4s, m4s)
                masked_loss2s = self.attentionLoss2s(mask2s, m2s)

                ## psnr and  ssim calculator.
                mse = self.criterion_GAN(output, target)
                psnr = 10 * log10(1 / mse.item())
                ssim = pytorch_ssim.ssim(output, target)

                psnres.update(psnr, input_image.size(0))
                ssimes.update(ssim, input_image.size(0))
                lossMask8s.update(masked_loss8s.item(), input_image.size(0))
                lossMask4s.update(masked_loss4s.item(), input_image.size(0))
                lossMask2s.update(masked_loss2s.item(), input_image.size(0))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                # plot progress
                bar.suffix = '({batch}/{size}) Data: {data:.2f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | SSIM: {ssim:.4f} | PSNR: {psnr:.4f}'.format(
                    batch=i + 1,
                    size=len(self.val_loader),
                    data=data_time.val,
                    bt=batch_time.val,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    ssim=ssimes.avg,
                    psnr=psnres.avg)
                bar.next()
        bar.finish()

        self.writer.add_scalar('val/SSIM', ssimes.avg, epoch)
        self.writer.add_scalar('val/PSNR', psnres.avg, epoch)
        self.writer.add_scalar('train/loss_Mask8s', lossMask8s.avg, epoch)
        self.writer.add_scalar('train/loss_Mask4s', lossMask4s.avg, epoch)
        self.writer.add_scalar('train/loss_Mask2s', lossMask2s.avg, epoch)

        self.metric = psnres.avg
Exemple #45
0
from progress.bar import Bar

categories = ["Ball", "Vase", "Corona", "Red", "Crown", "Grey_white"]
colors = ["lime", "white", "orange", "red", "cyan", "pink"]

files = set(glob("data/test/*.jpg"))

#with open("data/test.txt") as f:
#    files = [l.rstrip("\n") for l in f]

print(files)
#files = [l.replace("data/data", "pre_data") for l in files]

progress = Bar('Processing', max=len(files))
for f in files:
    progress.next()
    source = Image.open(f)
    draw = ImageDraw.Draw(source)

    img_width, img_height = source.size

    data = open(f.replace(".jpg", ".txt"))
    for d in data:
        params = d.split(" ")
        params = [float(p) for p in params]
        params[0] = int(params[0])

        color = colors[params[0]]
        x_center = int(params[1] * img_width)
        y_center = int(params[2] * img_height)
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None):
    if split == 'train':
        model.train()
    else:
        model.eval()
    Loss, Acc = AverageMeter(), AverageMeter()
    preds = []

    nIters = len(dataLoader)
    bar = Bar('{}'.format(opt.expID), max=nIters)

    for i, (input, target, _, meta) in enumerate(dataLoader):
        input_var = torch.autograd.Variable(input).float().cuda(opt.GPU)
        target_var = torch.autograd.Variable(target[0]).float().cuda(opt.GPU)
        output = model(input_var)

        if opt.DEBUG >= 2:
            gt = getPreds(target.cpu().numpy()) * 4
            pred = getPreds((output[opt.nStack - 1].data).cpu().numpy()) * 4
            debugger = Debugger()
            img = (input[0].numpy().transpose(1, 2, 0) * 256).astype(
                np.uint8).copy()
            debugger.addImg(img)
            debugger.addPoint2D(pred[0], (255, 0, 0))
            debugger.addPoint2D(gt[0], (0, 0, 255))
            debugger.showAllImg(pause=True)

        loss = criterion(output[0], target_var)
        for k in range(1, opt.nStack):
            loss += criterion(output[k], target_var)

        Loss.update(loss.item(), input.size(0))
        Acc.update(
            Accuracy((output[opt.nStack - 1].data).cpu().numpy(),
                     (target_var.data).cpu().numpy()))
        if split == 'train':
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        else:
            input_ = input.cpu().numpy()
            input_[0] = Flip(input_[0]).copy()
            inputFlip_var = torch.autograd.Variable(
                torch.from_numpy(input_).view(1, input_.shape[1], ref.inputRes,
                                              ref.inputRes)).float().cuda(
                                                  opt.GPU)
            outputFlip = model(inputFlip_var)
            outputFlip = ShuffleLR(
                Flip((outputFlip[opt.nStack -
                                 1].data).cpu().numpy()[0])).reshape(
                                     1, ref.nJoints, ref.outputRes,
                                     ref.outputRes)
            output_ = old_div(
                ((output[opt.nStack - 1].data).cpu().numpy() + outputFlip), 2)
            preds.append(
                finalPreds(output_, meta['center'], meta['scale'],
                           meta['rotate'])[0])

        Bar.suffix = '{split} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | Acc {Acc.avg:.6f} ({Acc.val:.6f})'.format(
            epoch,
            i,
            nIters,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=Loss,
            Acc=Acc,
            split=split)
        bar.next()

    bar.finish()
    return {'Loss': Loss.avg, 'Acc': Acc.avg}, preds
def createDataSet(file):
    path = os.path.abspath(file)
    pos = path.rfind('/')
    tokens = path[pos + 1:].split('_')
    descriptor_id = tokens[6]
    scene_name = tokens[2]
    scene_name = path[:pos] + '/' + scene_name + '_d.pcd'
    file_descriptor = path[:pos] + '/tmp' + descriptor_id + '.csv'
    labels = np.genfromtxt(file_descriptor,
                           dtype='str',
                           skip_header=1,
                           delimiter=',')
    print('Affordances in descriptor %d' % labels.shape[0])
    fileId = tokens[-1]
    tokens = fileId.split('.')
    fileId = tokens[0]
    print(fileId)
    res_data_file = path[:pos] + '/' + fileId + '_goodPointsX.pcd'
    res_points_file = path[:pos] + '/' + fileId + '_goodPoints.pcd'

    data = load_pcd_data(res_data_file, cols=None)
    #print(data.shape)
    points = load_pcd_data(res_points_file, cols=(0, 1, 2))
    real_c_data = load_pcd_data(res_points_file,
                                cols=(3, ),
                                dataType=np.uint32)
    #real_c_data=np.array(colors[:,-1],dtype=np.int32)
    red = np.array((real_c_data >> 16) & 0x0000ff,
                   dtype=np.uint8).reshape(-1, 1)
    green = np.array((real_c_data >> 8) & 0x0000ff,
                     dtype=np.uint8).reshape(-1, 1)
    blue = np.array((real_c_data) & 0x0000ff, dtype=np.uint8).reshape(-1, 1)

    real_c_data = np.concatenate((red, green, blue), axis=1)

    perPoint = np.sum(real_c_data, axis=1)
    bounds = np.cumsum(perPoint)
    #print(bounds)
    howMany = np.zeros((labels.shape[0], 1), dtype=np.int32)
    all_data = np.zeros((data.shape[0], 6))

    for i in range(all_data.shape[0]):
        point_id = np.nonzero(bounds > i)[0][0]
        all_data[i, :3] = points[point_id, :]
        all_data[i, 3:] = data[i, :3]

    for i in range(labels.shape[0]):
        success = np.nonzero(all_data[:, 3] == i)[0]
        success2 = np.nonzero(all_data[success, 2] > 0.3)[0]
        howMany[i] = success2.size

    ids_target = np.nonzero(howMany > n_samples)[0]
    print('Real found: %d' % ids_target.size)
    if n_orientations > 1:
        name = 'AffordancesDataset_augmented_names.txt'
    else:
        name = 'AffordancesDataset_names.txt'
    with open(name, "w") as text_file:
        for i in range(ids_target.shape[0]):
            text_file.write(
                "%d:%s-%s\n" %
                (i, labels[ids_target[i], 0], labels[ids_target[i], 2]))
    #
    #print(labels[ids_target,1:])

    all_points = np.zeros((ids_target.size, n_samples, 3))
    all_points_score = np.zeros((ids_target.size, n_samples))
    for i in range(ids_target.shape[0]):
        #get the 3D point for the response
        success = np.nonzero((all_data[:, 3] == ids_target[i])
                             & (all_data[:, 2] > 0.3))[0]
        sorted_ids = np.argsort(all_data[success, 5])
        print(
            'Sampling for %s %s in %d points(%f,%f)' %
            (labels[ids_target[i], 0], labels[ids_target[i], 2], success.size,
             np.max(all_data[success, 5]), np.min(all_data[success, 5])))
        sorted_ids = sorted_ids[::-1]
        for j in range(n_samples):
            all_points[i, j, :] = all_data[success[sorted_ids[j]], :3]
            all_points_score[i, j] = all_data[success[sorted_ids[j]], 5]
        #print('Min %f max %f'%(all_points_score[i,0],all_points_score[i,-1]))
    labels_d = np.arange(ids_target.size)
    print(
        'Sampled points maxZ %f minZ %f' % (np.max(all_points[:, :, 2].reshape(
            1, -1)), np.min(all_points[:, :, 2].reshape(1, -1))))

    #sys.exit()

    if n_orientations > 1:
        name = 'dataPointsAffordances_augmented.h5'
    else:
        name = 'dataPointsAffordances.h5'
    if os.path.exists(name):
        os.system('rm %s' % (name))
    save_h5(name, all_points, labels_d, 'float32', 'uint8')

    #get dense cloud
    dense_sceneCloud = pypcd.PointCloud.from_path(scene_name).pc_data
    pc_array = np.array([[x, y, z] for x, y, z in dense_sceneCloud])

    #generate pointclouds that were not detected to test against single example training
    good_points_file = path[:pos] + '/' + fileId + '_goodPointsIds.pcd'
    sampled_points_file = path[:pos] + '/' + fileId + '_samplePointsIds.pcd'
    sampled_ids = np.sort(
        load_pcd_data(sampled_points_file, cols=(0, ), dataType=np.int32))
    good_ids = np.sort(
        load_pcd_data(good_points_file, cols=(0, ), dataType=np.int32))
    non_affordance = np.setdiff1d(np.arange(sampled_ids.shape[0]), good_ids)
    sampled_points_file = path[:pos] + '/' + fileId + '_samplePoints.pcd'
    sampled_points = load_pcd_data(sampled_points_file, cols=(0, 1, 2))
    np.random.shuffle(non_affordance)
    print('Getting 1024 negative examples ')
    #shuffle negative examples ids
    bar = Bar('Processing', max=1024)
    negative_examples = np.zeros((1024, n_points, 3), dtype=np.float32)
    for i in range(1024):
        point = pc_array[non_affordance[i], ...]
        voxel = getVoxel(point, max_rad, pc_array)
        minP = np.min(voxel, 0)
        maxP = np.max(voxel, 0)
        dist = np.linalg.norm(maxP - minP, axis=0) / 2
        print('RAD %f rad %f estimation %f' %
              (dist, max_rad, max_rad * np.sqrt(3)))
        sample = sample_cloud(voxel, n_points)
        negative_examples[i, ...] = sample
        bar.next()
    bar.finish()
    negative_labels = 100 * np.ones((1024, 1), dtype=np.uint8)
    print('Got %d negative examples' % (negative_examples.shape[0]))
    print(negative_examples[0, 0, :])
    name = 'AffordancesDataset_negatives.h5'
    if os.path.exists(name):
        os.system('rm %s' % (name))
    save_h5(name, negative_examples, negative_labels, 'float32', 'uint8')
    #sys.exit()

    print('Sampling actual voxels from %s of %d points' %
          (scene_name, pc_array.shape[0]))
    dataSet_data = np.zeros(
        (all_points.shape[0] * all_points.shape[1] * n_orientations, n_points,
         3),
        dtype=np.float32)
    dataSet_labels = np.zeros(
        (all_points.shape[0] * all_points.shape[1] * n_orientations, 1),
        dtype=np.uint8)
    print(dataSet_data.shape)
    count = 0
    #data_type 0->centered
    data_type = 1
    #extract voxels and pointclouds for dataset
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.hold(False)
    for aff in range(all_points.shape[0]):
        print('Training examples for %s %s' %
              (labels[ids_target[aff], 0], labels[ids_target[aff], 2]))
        bar = Bar('Processing', max=all_points.shape[1])
        for n_sample in range(all_points.shape[1]):
            point = all_points[aff, n_sample, :].reshape(3, -1)
            #print(point.shape)
            voxel = getVoxel(point, max_rad, pc_array)
            if voxel.shape[0] < n_points:
                sample = aVoxel
            else:
                sample = sample_cloud(voxel, n_points)
            if data_type == 0:
                centered_sample = sample - point
            else:
                centered_sample = sample
            #rotate this voxels n_orientations around Z (up)
            for j in range(n_orientations):
                rotated_voxel = rotate_point_cloud_by_angle(
                    np.expand_dims(centered_sample, axis=0),
                    j * 2 * np.pi / n_orientations).squeeze()
                dataSet_data[count, ...] = rotated_voxel
                dataSet_labels[count] = labels_d[aff]
                count += 1
            if n_sample == 0:
                ax.scatter(rotated_voxel[:, 0],
                           rotated_voxel[:, 1],
                           rotated_voxel[:, 2],
                           s=3)
                plt.pause(0.2)
                plt.draw()
            bar.next()
        bar.finish()
    if n_orientations > 1:
        name = 'AffordancesDataset_augmented.h5'
    else:
        name = 'AffordancesDataset.h5'
    if os.path.exists(name):
        os.system('rm %s' % (name))
    save_h5(name, dataSet_data, dataSet_labels, 'float32', 'uint8')
Exemple #48
0
def validation(model, val_loader, epoch, writer):
    # set evaluate mode
    model.eval()

    total_correct, total_label = 0, 0
    total_correct_hb, total_label_hb = 0, 0
    total_correct_fb, total_label_fb = 0, 0
    hist = np.zeros((args.num_classes, args.num_classes))
    hist_hb = np.zeros((args.hbody_cls, args.hbody_cls))
    hist_fb = np.zeros((args.fbody_cls, args.fbody_cls))

    # Iterate over data.
    bar = Bar('Processing {}'.format('val'), max=len(val_loader))
    bar.check_tty = False
    for idx, batch in enumerate(val_loader):
        image, target, hlabel, flabel, _ = batch
        image, target, hlabel, flabel = image.cuda(), target.cuda(
        ), hlabel.cuda(), flabel.cuda()
        with torch.no_grad():
            h, w = target.size(1), target.size(2)
            outputs = model(image)
            outputs = gather(outputs, 0, dim=0)
            preds = F.interpolate(input=outputs[0][-1],
                                  size=(h, w),
                                  mode='bilinear',
                                  align_corners=True)
            preds_hb = F.interpolate(input=outputs[1][-1],
                                     size=(h, w),
                                     mode='bilinear',
                                     align_corners=True)
            preds_fb = F.interpolate(input=outputs[2][-1],
                                     size=(h, w),
                                     mode='bilinear',
                                     align_corners=True)
            if idx % 50 == 0:
                img_vis = inv_preprocess(image, num_images=args.save_num)
                label_vis = decode_predictions(target.int(),
                                               num_images=args.save_num,
                                               num_classes=args.num_classes)
                pred_vis = decode_predictions(torch.argmax(preds, dim=1),
                                              num_images=args.save_num,
                                              num_classes=args.num_classes)

                # visual grids
                img_grid = torchvision.utils.make_grid(
                    torch.from_numpy(img_vis.transpose(0, 3, 1, 2)))
                label_grid = torchvision.utils.make_grid(
                    torch.from_numpy(label_vis.transpose(0, 3, 1, 2)))
                pred_grid = torchvision.utils.make_grid(
                    torch.from_numpy(pred_vis.transpose(0, 3, 1, 2)))
                writer.add_image('val_images', img_grid,
                                 epoch * len(val_loader) + idx + 1)
                writer.add_image('val_labels', label_grid,
                                 epoch * len(val_loader) + idx + 1)
                writer.add_image('val_preds', pred_grid,
                                 epoch * len(val_loader) + idx + 1)

            # pixelAcc
            correct, labeled = batch_pix_accuracy(preds.data, target)
            correct_hb, labeled_hb = batch_pix_accuracy(preds_hb.data, hlabel)
            correct_fb, labeled_fb = batch_pix_accuracy(preds_fb.data, flabel)
            # mIoU
            hist += fast_hist(preds, target, args.num_classes)
            hist_hb += fast_hist(preds_hb, hlabel, args.hbody_cls)
            hist_fb += fast_hist(preds_fb, flabel, args.fbody_cls)

            total_correct += correct
            total_correct_hb += correct_hb
            total_correct_fb += correct_fb
            total_label += labeled
            total_label_hb += labeled_hb
            total_label_fb += labeled_fb
            pixAcc = 1.0 * total_correct / (np.spacing(1) + total_label)
            IoU = round(np.nanmean(per_class_iu(hist)) * 100, 2)
            pixAcc_hb = 1.0 * total_correct_hb / (np.spacing(1) +
                                                  total_label_hb)
            IoU_hb = round(np.nanmean(per_class_iu(hist_hb)) * 100, 2)
            pixAcc_fb = 1.0 * total_correct_fb / (np.spacing(1) +
                                                  total_label_fb)
            IoU_fb = round(np.nanmean(per_class_iu(hist_fb)) * 100, 2)
            # plot progress
            bar.suffix = '{} / {} | pixAcc: {pixAcc:.4f}, mIoU: {IoU:.4f} |' \
                         'pixAcc_hb: {pixAcc_hb:.4f}, mIoU_hb: {IoU_hb:.4f} |' \
                         'pixAcc_fb: {pixAcc_fb:.4f}, mIoU_fb: {IoU_fb:.4f}'.format(idx + 1, len(val_loader),
                                                                                    pixAcc=pixAcc, IoU=IoU,
                                                                                    pixAcc_hb=pixAcc_hb, IoU_hb=IoU_hb,
                                                                                    pixAcc_fb=pixAcc_fb, IoU_fb=IoU_fb)
            bar.next()

    print('\n per class iou part: {}'.format(per_class_iu(hist) * 100))
    print('per class iou hb: {}'.format(per_class_iu(hist_hb) * 100))
    print('per class iou fb: {}'.format(per_class_iu(hist_fb) * 100))

    mIoU = round(np.nanmean(per_class_iu(hist)) * 100, 2)
    mIoU_hb = round(np.nanmean(per_class_iu(hist_hb)) * 100, 2)
    mIoU_fb = round(np.nanmean(per_class_iu(hist_fb)) * 100, 2)

    writer.add_scalar('val_pixAcc', pixAcc, epoch)
    writer.add_scalar('val_mIoU', mIoU, epoch)
    writer.add_scalar('val_pixAcc_hb', pixAcc_hb, epoch)
    writer.add_scalar('val_mIoU_hb', mIoU_hb, epoch)
    writer.add_scalar('val_pixAcc_fb', pixAcc_fb, epoch)
    writer.add_scalar('val_mIoU_fb', mIoU_fb, epoch)
    bar.finish()

    return pixAcc, mIoU
def sampleFromFile(affordance,
                   list_of_files,
                   number_of_samples,
                   pointsPerCloud=4096):
    file_options = np.arange(len(list_of_files))
    files_to_sample = np.random.randint(len(list_of_files),
                                        size=(1, number_of_samples))
    repeated = np.bincount(files_to_sample[0, :], minlength=len(list_of_files))
    actually_sample_files = np.nonzero(repeated)[0]
    dataPoints = np.empty((number_of_samples, 6), dtype=np.float)
    dataClouds = np.empty((number_of_samples, pointsPerCloud, 3),
                          dtype=np.float32)
    start_id = 0
    actually_sampled = 0
    outOfPoints = False
    bar = Bar('Sampling ', max=number_of_samples)
    for i in range(actually_sample_files.size):
        file = list_of_files[actually_sample_files[i]] + "_newData.csv"
        pos = file.rfind('/') + 1
        if "space/" in file:
            #Need to search for the exact file
            pos_id = list_of_files[actually_sample_files[i]].rfind('/') + 1
            target_file_id = list_of_files[actually_sample_files[i]][pos_id:]
            path_to_scene = file[:
                                 pos_id] + 'All_affordances_*_' + target_file_id + '.pcd'
            someFile = glob.glob(path_to_scene)
            tokens = someFile[0].split('_')
            cloud_file = list_of_files[
                actually_sample_files[i]][:pos_id] + tokens[2]
            if "real" in tokens[2]:
                cloud_file = cloud_file + ".pcd"
            else:
                cloud_file = cloud_file + "_d.pcd"
                #if "readingroom" in cloud_file:
                #print(list_of_files[actually_sample_files[i]])
                #print(cloud_file)
                #sys.exit()
        else:
            pos_id = list_of_files[actually_sample_files[i]].rfind('/') + 1
            target_file_id = list_of_files[actually_sample_files[i]][pos_id:]
            if "DATA" in file[:pos_id]:
                path_to_scene = file[:pos_id] + '*_clean.pcd'
                someFile = glob.glob(path_to_scene)
                cloud_file = someFile[0]
            else:
                path_to_scene = file[:
                                     pos_id] + 'All_affordances_*_' + target_file_id + '.pcd'
                someFile = glob.glob(path_to_scene)
                tokens = someFile[0].split('_')
                cloud_file = list_of_files[
                    actually_sample_files[i]][:pos_id] + tokens[2] + '.pcd'
                #print(cloud_file)
                #sys.exit()
        sample_from_file = repeated[actually_sample_files[i]]
        data = np.genfromtxt(file, delimiter=",", dtype='float32')
        target_ids = np.nonzero(data[:, A_ID].astype(int) == affordance)[0]
        sorted_subset = np.argsort(data[target_ids, SCORE])
        sorted_subset = sorted_subset[::-1]
        j = 0
        k = 0
        complete_sample = False
        if not os.path.exists(cloud_file):
            print('No input cloud %s' % (cloud_file))
            return np.empty((0, 6)), np.empty((0, 0, 0))
        cloud, _ = load_pcd_data_binary(cloud_file)
        kdt = BallTree(cloud, leaf_size=5, metric='euclidean')
        while not complete_sample:
            #take points until conplete set
            dataPoints[start_id + j, :] = data[target_ids[sorted_subset[k]], :]
            point = dataPoints[start_id + j, :3]
            voxel_ids = getVoxel(point, max_rad, kdt)
            voxel = cloud[voxel_ids, :]
            actual_voxel_size = voxel.shape[0]
            if actual_voxel_size < (pointsPerCloud / 4):
                #bad point, get a new one
                if k == 0:
                    print("\n File %s" % (cloud_file))
                outputText = "Voxel " + str(
                    voxel.shape[0]) + " " + str(k) + "/" + str(
                        sorted_subset.shape[0])
                print(outputText, end='\r')
                #print('\nFile: %s bad point %d/%d\r'%(someFile[0],k,sorted_subset.shape[0]))
                #print('bad point %d of %d Voxel: %d'%(k,sorted_subset.shape[0],voxel.shape[0]))
                k += 1
                if k >= sorted_subset.shape[0]:
                    outOfPoints = True
                    print('Exhausted File')
                    break
            else:
                if actual_voxel_size >= pointsPerCloud:
                    sample = sample_cloud(voxel, pointsPerCloud)
                else:
                    print('padding')
                    padding = point + np.zeros(
                        (pointsPerCloud - actual_voxel_size, 3),
                        dtype=np.float32)
                    sample = np.concatenate((padding, voxel), axis=0)
                #center cloud
                dataClouds[start_id + j, ...] = sample - point
                j += 1
                #print('\tVoxel size (%d,%d) SampleSize(%d,%d) start_id %d +j %d'%(voxel.shape[0],voxel.shape[1],sample.shape[0],sample.shape[1],start_id,j))
            if j == sample_from_file:
                complete_sample = True
        if not outOfPoints:
            start_id += sample_from_file
            actually_sampled += sample_from_file
            bar.next(sample_from_file)
        else:
            break
    bar.finish()
    if outOfPoints or actually_sampled != number_of_samples:
        return np.empty((0, 6)), np.empty((0, 0, 0))
    else:
        return dataPoints, dataClouds
Exemple #50
0
def mostImportantFormat(output_path, pages):
    # Counter to store images of each page of PDF to image
    image_counter = 1

    # Iterate through all the pages stored above
    for page in pages:

        # Declaring filename for each page of PDF as JPG
        filename = os.path.join(output_path,
                                "page_" + str(image_counter) + ".jpg")

        # Save the image of the page in system
        page.save(filename, 'JPEG')

        # Increment the counter to update filename
        image_counter = image_counter + 1

    # Variable to get count of total number of pages
    filelimit = image_counter - 1

    # Creating a text file to write the output
    outfile = "out_text0.txt"

    f = open(outfile, "a")

    #Progress Bar
    bar = Bar('Processing', max=filelimit)

    # Iterate from 1 to total number of pages
    for i in range(1, filelimit + 1):

        filepath = os.path.join(output_path, "page_" + str(i) + ".jpg")

        # load the original image
        image = cv2.imread(filepath)

        # convert the image to black and white for better OCR
        ret, thresh1 = cv2.threshold(image, 120, 255, cv2.THRESH_BINARY)

        # pytesseract image to string to get results
        text = str(pytesseract.image_to_string(thresh1, config='--psm 6'))

        # Split the entire text into lines and store in a list
        arr = text.split("\n")

        # Flag to check when to start parsing lines
        start_flag = False
        for each in arr:

            # If the line has gender/age texts then print it
            if "Gender/Age" in each and i == 1:
                f.write(each + "\n")
                continue
            # If the line has Name then print it and start parsing lines from here onwards
            if "Name" in each and "Value" not in each:
                start_flag = True
                if i == 1:
                    f.write(each + "\n")
                continue
            # If the line is a valid row print it, else move to next
            if start_flag:
                row_arr = each.split()
                if isValidRow(row_arr):
                    f.write(each + "\n")

        # Increment the terminal progress bar
        bar.next()
    try:
        #Delete all created images
        shutil.rmtree(output_path)
        os.mkdir(output_path)
    except Exception as e:
        print("Error occurred while deleting images : " + str(e))

    bar.finish()

    # Close the file after writing all the text.
    f.close()
    def run_epoch(self, phase, epoch, data_loader, rank):
        model_with_loss = self.model_with_loss
        if phase == 'train':
            model_with_loss.train()
        else:
            model_with_loss.eval()
            torch.cuda.empty_cache()

        results = {}
        data_time, batch_time = AverageMeter(), AverageMeter()
        avg_loss_stats = {
            l: AverageMeter()
            for l in self.loss_stats if l in ('tot', 'hm', 'wh', 'tracking')
        }
        num_iters = len(
            data_loader
        ) if self.args.num_iters[phase] < 0 else self.args.num_iters[phase]
        bar = Bar('{}'.format("tracking"), max=num_iters)
        end = time.time()
        for iter_id, batch in enumerate(data_loader):
            if iter_id >= num_iters:
                break
            data_time.update(time.time() - end)

            for k in batch:
                if k in ('fpath', 'prev_fpath'):
                    continue
                if type(batch[k]) != list:
                    batch[k] = batch[k].to(self.args.device, non_blocking=True)
                else:
                    for i in range(len(batch[k])):
                        batch[k][i] = batch[k][i].to(self.args.device,
                                                     non_blocking=True)

            output, loss, loss_stats = model_with_loss(batch)
            loss = loss.mean()
            if phase == 'train':
                self.optimizer.zero_grad()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model_with_loss.parameters(),
                                               self.args.clip_value)
                self.optimizer.step()

            batch_time.update(time.time() - end)
            end = time.time()

            Bar.suffix = '{phase}: [{0}][{1}/{2}]| '.format(epoch,
                                                            iter_id,
                                                            num_iters,
                                                            phase=phase)
            for l in avg_loss_stats:
                avg_loss_stats[l].update(loss_stats[l].mean().item(),
                                         batch['image'].size(0))
                Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(
                    l, avg_loss_stats[l].avg)
            Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \
                                      '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)

            if rank == 0 and phase == 'val' and self.args.write_mota_metrics and epoch in self.args.save_point:
                curr_name = None
                tracker = None
                for i in range(self.args.batch_size):
                    try:
                        fpath = batch['fpath'][i]
                    except IndexError:
                        break
                    fpath = fpath.split('.')[0].split('/')[-1]

                    name, num = fpath.split("_frame_")
                    num = int(num)
                    if num % self.args.val_select_frame != 0:
                        continue

                    if name != curr_name:
                        curr_name = name
                        tracker = Tracker(self.args)

                    out = [x[i][None] for x in output]
                    res = out
                    dets = generic_decode(
                        {k: res[t]
                         for (t, k) in enumerate(self.args.heads)},
                        self.args.max_objs, self.args)
                    for k in dets:
                        dets[k] = dets[k].detach().cpu().numpy()

                    if not tracker.init and len(dets) > 0:
                        tracker.init_track(dets)
                    elif len(dets) > 0:
                        tracker.step(dets)

                    with open(os.path.join(self.args.res_dir, fpath + '.txt'),
                              "w") as f:
                        for track in tracker.tracks:
                            x1, y1, x2, y2 = track['bbox']
                            f.write("{} {} {} {} {} {}\n".format(
                                track['score'], track['tracking_id'], x1, y1,
                                x2, y2))
            if rank == 0 and self.args.print_iter > 0:  # If not using progress bar
                if iter_id % self.args.print_iter == 0:
                    print('{}| {}'.format("tracking", Bar.suffix))
            else:
                bar.next()
            del output, loss, loss_stats

        if rank == 0 and phase == 'val' and self.args.write_mota_metrics and epoch in self.args.save_point:
            self.compute_map(epoch)

        bar.finish()
        ret = {k: v.avg for k, v in avg_loss_stats.items()}
        ret['time'] = bar.elapsed_td.total_seconds() / 60.
        return ret, results
    def save_samples(self, purpose_hdf5_group: h5py.Group,
                     logdir: pathlib.Path):
        logdir.mkdir(exist_ok=True, parents=True)
        data_hdf5_group = purpose_hdf5_group[f"data"]
        dataset_length = len(data_hdf5_group[ChannelEnum.REC_DEM.value])
        num_samples = int(dataset_length / self.config["sample_frequency"])

        progress_bar = Bar(f"Plot samples for {str(purpose_hdf5_group.name)}",
                           max=num_samples)
        for sample_idx in range(num_samples):
            idx = sample_idx * self.config["sample_frequency"]
            res_grid = data_hdf5_group[ChannelEnum.RES_GRID.value][idx, ...]
            rel_position = data_hdf5_group[ChannelEnum.REL_POSITION.value][idx,
                                                                           ...]
            rec_dem = data_hdf5_group[ChannelEnum.REC_DEM.value][idx, ...]
            occluded_elevation_map = data_hdf5_group[
                ChannelEnum.OCC_DEM.value][idx, ...]
            comp_dem = data_hdf5_group[ChannelEnum.COMP_DEM.value][idx, ...]

            gt_dem = None
            if ChannelEnum.GT_DEM.value in data_hdf5_group:
                gt_dem = data_hdf5_group[ChannelEnum.GT_DEM.value][idx, ...]

            non_occluded_elevation_map = occluded_elevation_map[
                ~np.isnan(occluded_elevation_map)]

            rec_data_um = None
            if ChannelEnum.REC_DATA_UM.value in data_hdf5_group:
                rec_data_um = data_hdf5_group[ChannelEnum.REC_DATA_UM.value][
                    idx, ...]
            comp_data_um = None
            if ChannelEnum.COMP_DATA_UM.value in data_hdf5_group:
                comp_data_um = data_hdf5_group[ChannelEnum.COMP_DATA_UM.value][
                    idx, ...]
            model_um = None
            if ChannelEnum.MODEL_UM.value in data_hdf5_group:
                model_um = data_hdf5_group[ChannelEnum.MODEL_UM.value][idx,
                                                                       ...]
            total_um = None
            if ChannelEnum.TOTAL_UM.value in data_hdf5_group:
                total_um = data_hdf5_group[ChannelEnum.TOTAL_UM.value][idx,
                                                                       ...]

            rec_dems = None
            if ChannelEnum.REC_DEMS.value in data_hdf5_group:
                rec_dems = data_hdf5_group[ChannelEnum.REC_DEMS.value][idx,
                                                                       ...]
            comp_dems = None
            if ChannelEnum.COMP_DEMS.value in data_hdf5_group:
                comp_dems = data_hdf5_group[ChannelEnum.COMP_DEMS.value][idx,
                                                                         ...]

            u = int(
                round(occluded_elevation_map.shape[0] / 2 +
                      rel_position[0] / res_grid[0]))
            v = int(
                round(occluded_elevation_map.shape[1] / 2 +
                      rel_position[1] / res_grid[1]))
            # we only visualize the robot position if its inside the elevation map
            plot_robot_position = 0 < u < occluded_elevation_map.shape[
                0] and 0 < v < occluded_elevation_map.shape[1]
            if plot_robot_position:
                robot_position_pixel = np.array([u, v])
            else:
                robot_position_pixel = None
            indiv_vranges = self.config.get("indiv_vranges", True)

            # 2D
            if indiv_vranges is False:
                elevation_vmin = np.min(
                    [np.min(rec_dem),
                     np.min(comp_dem[~np.isnan(comp_dem)])])
                elevation_vmax = np.max(
                    [np.max(rec_dem),
                     np.max(comp_dem[~np.isnan(comp_dem)])])

                if non_occluded_elevation_map.size != 0:
                    elevation_vmin = np.min(
                        [elevation_vmin,
                         np.min(non_occluded_elevation_map)])
                    elevation_vmax = np.max(
                        [elevation_vmax,
                         np.max(non_occluded_elevation_map)])

                if gt_dem is not None and np.isnan(gt_dem).all() is False:
                    ground_truth_dem_vmin = np.min(gt_dem[~np.isnan(gt_dem)])
                    ground_truth_dem_vmax = np.max(gt_dem[~np.isnan(gt_dem)])
                    elevation_vmin = np.min(
                        [elevation_vmin, ground_truth_dem_vmin])
                    elevation_vmax = np.max(
                        [elevation_vmax, ground_truth_dem_vmax])
            else:
                elevation_vmin = None
                elevation_vmax = None

            elevation_cmap = plt.get_cmap("viridis")

            fig, axes = plt.subplots(nrows=2, ncols=2, figsize=[12, 10])
            # axes = np.expand_dims(axes, axis=0)

            if gt_dem is not None:
                axes[0, 0].set_title("Ground-truth")
                # matshow plots x and y swapped
                mat = axes[0, 0].matshow(np.swapaxes(gt_dem, 0, 1),
                                         vmin=elevation_vmin,
                                         vmax=elevation_vmax,
                                         cmap=elevation_cmap)
                if indiv_vranges:
                    fig.colorbar(mat, ax=axes[0, 0], fraction=0.08)

            axes[0, 1].set_title("Reconstruction")
            # matshow plots x and y swapped
            mat = axes[0, 1].matshow(np.swapaxes(rec_dem, 0, 1),
                                     vmin=elevation_vmin,
                                     vmax=elevation_vmax,
                                     cmap=elevation_cmap)
            if indiv_vranges:
                fig.colorbar(mat, ax=axes[0, 1], fraction=0.08)
            axes[1, 0].set_title("Composition")
            # matshow plots x and y swapped
            mat = axes[1, 0].matshow(np.swapaxes(comp_dem, 0, 1),
                                     vmin=elevation_vmin,
                                     vmax=elevation_vmax,
                                     cmap=elevation_cmap)
            if indiv_vranges:
                fig.colorbar(mat, ax=axes[1, 0], fraction=0.08)
            axes[1, 1].set_title("Occlusion")
            # matshow plots x and y swapped
            mat = axes[1, 1].matshow(np.swapaxes(occluded_elevation_map, 0, 1),
                                     vmin=elevation_vmin,
                                     vmax=elevation_vmax,
                                     cmap=elevation_cmap)
            if indiv_vranges:
                fig.colorbar(mat, ax=axes[1, 1], fraction=0.08)

            if indiv_vranges is False:
                fig.colorbar(mat, ax=axes.ravel().tolist(), fraction=0.045)

            for i, ax in enumerate(axes.reshape(-1)):
                if plot_robot_position:
                    ax.plot([u], [v], marker="*", color="red")

                # Hide grid lines
                ax.grid(False)

            plt.draw()
            plt.savefig(str(logdir / f"sample_2d_{idx}.pdf"))
            if self.remote is not True:
                plt.show()
            plt.close()

            # 3D
            fig = plt.figure(figsize=[2 * 6.4, 1 * 4.8])
            plt.clf()
            axes = []
            num_cols = 3

            x_3d = np.arange(
                start=-int(occluded_elevation_map.shape[0] / 2),
                stop=int(occluded_elevation_map.shape[0] / 2)) * res_grid[0]
            y_3d = np.arange(
                start=-int(occluded_elevation_map.shape[1] / 2),
                stop=int(occluded_elevation_map.shape[1] / 2)) * res_grid[1]
            x_3d, y_3d = np.meshgrid(x_3d, y_3d)

            axes.append(
                fig.add_subplot(100 + num_cols * 10 + 1, projection="3d"))
            # the np.NaNs in the occluded elevation maps give us these warnings:
            warnings.filterwarnings("ignore", category=UserWarning)
            if gt_dem is not None:
                axes[0].set_title("Ground-truth")
                axes[0].plot_surface(x_3d,
                                     y_3d,
                                     gt_dem,
                                     vmin=elevation_vmin,
                                     vmax=elevation_vmax,
                                     cmap=elevation_cmap)
            axes.append(
                fig.add_subplot(100 + num_cols * 10 + 2, projection="3d"))
            axes[1].set_title("Reconstruction")
            axes[1].plot_surface(x_3d,
                                 y_3d,
                                 rec_dem,
                                 vmin=elevation_vmin,
                                 vmax=elevation_vmax,
                                 cmap=elevation_cmap)
            axes.append(
                fig.add_subplot(100 + num_cols * 10 + 3, projection="3d"))
            axes[2].set_title("Occlusion")
            axes[2].plot_surface(x_3d,
                                 y_3d,
                                 occluded_elevation_map,
                                 vmin=elevation_vmin,
                                 vmax=elevation_vmax,
                                 cmap=elevation_cmap)
            warnings.filterwarnings("default", category=UserWarning)
            fig.colorbar(mat, ax=axes, fraction=0.015)

            for i, ax in enumerate(axes):
                if plot_robot_position:
                    ax.scatter([rel_position[0]], [rel_position[1]],
                               [rel_position[2]],
                               marker="*",
                               color="red")
                ax.set_xlabel("x [m]")
                ax.set_ylabel("y [m]")
                ax.set_zlabel("z [m]")

                # Hide grid lines
                ax.grid(False)

            plt.draw()
            plt.savefig(str(logdir / f"sample_3d_{idx}.pdf"))
            if self.remote is not True:
                plt.show()
            plt.close()

            if gt_dem is not None \
                    or rec_data_um is not None or model_um is not None:
                draw_error_uncertainty_plot(
                    idx,
                    logdir,
                    gt_dem=gt_dem,
                    rec_dem=rec_dem,
                    comp_dem=comp_dem,
                    rec_data_um=rec_data_um,
                    comp_data_um=comp_data_um,
                    model_um=model_um,
                    total_um=total_um,
                    robot_position_pixel=robot_position_pixel,
                    remote=self.remote,
                    indiv_vranges=indiv_vranges)

            if rec_dems is not None:
                draw_solutions_plot(idx,
                                    logdir,
                                    ChannelEnum.REC_DEMS,
                                    rec_dems,
                                    robot_position_pixel=robot_position_pixel,
                                    remote=self.remote)

            if comp_dems is not None:
                draw_solutions_plot(idx,
                                    logdir,
                                    ChannelEnum.COMP_DEMS,
                                    rec_dems,
                                    robot_position_pixel=robot_position_pixel,
                                    remote=self.remote)

            if ChannelEnum.REC_TRAV_RISK_MAP.value in data_hdf5_group \
                    and ChannelEnum.COMP_TRAV_RISK_MAP.value in data_hdf5_group:
                rec_trav_risk_map = data_hdf5_group[
                    ChannelEnum.REC_TRAV_RISK_MAP.value][idx, ...]
                comp_trav_risk_map = data_hdf5_group[
                    ChannelEnum.COMP_TRAV_RISK_MAP.value][idx, ...]
                draw_traversability_plot(
                    idx,
                    logdir,
                    gt_dem=gt_dem,
                    rec_dem=rec_dem,
                    comp_dem=comp_dem,
                    rec_data_um=rec_data_um,
                    comp_data_um=comp_data_um,
                    model_um=model_um,
                    total_um=total_um,
                    rec_trav_risk_map=rec_trav_risk_map,
                    comp_trav_risk_map=comp_trav_risk_map,
                    robot_position_pixel=robot_position_pixel,
                    remote=self.remote)

            progress_bar.next()
        progress_bar.finish()
Exemple #53
0
def magic_eight_ball():
    responses = [
        "There is never enough time in the morning. Try to combine brushing your teeth with your breakfast.",
        "A sticking plaster can heal any wound. You just have to believe.",
        "Floss. It's more important than you would think.",
        "You should probaby drink more water.",
        "You should consider buying a plunger before you need a plunger",
        "You know what you should probably earn more than you show, speak less than you know.",
        "Hahahahaha",
        "Once a week, take a bath in Epsom Salts, and if you can, add half cup of baking soda and some essential oil such as lavender.",
        "When exercising, count backwards. For example, if you are carrying out 20 sit ups, don’t count from 1 to 20, start at 20 and count backwards as you do them.",
        "Start listening to your gut instinct. It’s always right",
        "Never give anyone more than 2 chances.",
        "Wear sunscreen, even if you think you don't need it",
        "If you can do something in less than 5 minutes. Do it now.",
        "Always strive to stand and sit with good posture.", "Just have fun",
        "To be Idle is to be foolish",
        "You might want to run, but you should stay and fight.",
        "Face the truth with dignity", "Travel is in your future",
        "Don't wait for success to come - go find it!"
    ]
    question = (input(
        "Hi,Enter your question\n or..\n Enter F to crack your fortune cookie \nEnter Q to quit game "
    )).upper()
    if question == "Q":
        return "Come back again soon"
    elif question == "F":
        bar = Bar('Processing', max=20, suffix='%(percent)d%%')
        for i in range(20):
            time.sleep(.15)
            bar.next()
        bar.finish()
        print(random.choice(responses))
        Continue = (input(
            "Play again?\n Enter 'Yes' to continue  or...\n 'No' to exit game "
        )).upper()
        if Continue == "YES":
            magic_eight_ball()
        else:
            return "come back again"
    elif len(question) < 10:
        bar = Bar('Processing', max=20, suffix='%(percent)d%%')
        for i in range(20):
            time.sleep(.15)
            bar.next()
        bar.finish()
        print("Invalid input")
        Continue = (input(
            "Play again?\n Enter 'Yes' to continue  or...\n 'No' to exit game "
        )).upper()
        if Continue == "YES":
            magic_eight_ball()
        else:
            return "come back again"
    else:
        bar = Bar('Processing', max=20, suffix='%(percent)d%%')
        for i in range(20):
            time.sleep(.15)
            bar.next()
        bar.finish()
        print(random.choice(responses))
        Continue = (input(
            "Play again?\n Enter 'Yes' to continue  or...\n 'No' to exit game "
        )).upper()
        if Continue == "YES":
            magic_eight_ball()
        else:
            return "come back again"
Exemple #54
0
def verify_batch_consumer_performance():
    """ Verify batch Consumer performance """

    conf = {
        'bootstrap.servers': bootstrap_servers,
        'group.id': uuid.uuid1(),
        'session.timeout.ms': 6000,
        'error_cb': error_cb,
        'auto.offset.reset': 'earliest'
    }

    c = confluent_kafka.Consumer(conf)

    def my_on_assign(consumer, partitions):
        print('on_assign:', len(partitions), 'partitions:')
        for p in partitions:
            print(' %s [%d] @ %d' % (p.topic, p.partition, p.offset))
        consumer.assign(partitions)

    def my_on_revoke(consumer, partitions):
        print('on_revoke:', len(partitions), 'partitions:')
        for p in partitions:
            print(' %s [%d] @ %d' % (p.topic, p.partition, p.offset))
        consumer.unassign()

    c.subscribe([topic], on_assign=my_on_assign, on_revoke=my_on_revoke)

    max_msgcnt = 1000000
    bytecnt = 0
    msgcnt = 0
    batch_size = 1000

    print('Will now consume %d messages' % max_msgcnt)

    if with_progress:
        bar = Bar('Consuming',
                  max=max_msgcnt,
                  suffix='%(index)d/%(max)d [%(eta_td)s]')
    else:
        bar = None

    while msgcnt < max_msgcnt:
        # Consume until we hit max_msgcnt

        msglist = c.consume(num_messages=batch_size, timeout=20.0)

        for msg in msglist:
            if msg.error():
                raise confluent_kafka.KafkaException(msg.error())

            bytecnt += len(msg)
            msgcnt += 1

            if bar is not None and (msgcnt % 10000) == 0:
                bar.next(n=10000)

            if msgcnt == 1:
                t_first_msg = time.time()

    if bar is not None:
        bar.finish()

    if msgcnt > 0:
        t_spent = time.time() - t_first_msg
        print('%d messages (%.2fMb) consumed in %.3fs: %d msgs/s, %.2f Mb/s' %
              (msgcnt, bytecnt / (1024 * 1024), t_spent, msgcnt / t_spent,
               (bytecnt / t_spent) / (1024 * 1024)))

    print('closing consumer')
    c.close()
Exemple #55
0
    # Start progress bar. max obtained from reading in the excel file and checking number of rows
    indexing_progress_bar = Bar("Reading in documents to train Word2Vec Model",
                                max=NUM_DOCS)

    # Read in CSV dataset and remove headers from consideration
    csv_reader = csv.reader(csvfile)
    next(csv_reader, None)

    # Iterate over each row, and each row represents a document
    for row in csv_reader:
        # append title, content and court for training
        data = row[1] + row[2] + row[4]
        sentences.append(cleaner.clean(data))
        # Update progress bar
        indexing_progress_bar.next()

    #End time
    end = time.time()

    #Time taken
    print(f"Time taken to index is {(end-start):.2f}s")

train_start = time.time()
# Progress bar finish
indexing_progress_bar.finish()
print("Starting training...")

model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)

train_end = time.time()
Exemple #56
0
def traintriandcls(mymodel,epoch,cuda_gpu,optimizer,mytraindata,scheduler):
    global min_loss
    batch_timer = AverageMeter()
    data_timer = AverageMeter()
    prec_losses = AverageMeter()
    acc_avg = AverageMeter()
    print('epoch {}'.format(epoch + 1))

    trainloss=0.
    record=0
    mytraindata.create_triplet_classbased_1(mymodel,args)

    mytrainloader = torch.utils.data.DataLoader(mytraindata, batch_size=args.batch_size, shuffle=True,num_workers=50)

    mymodel.train()

    bar = Bar('[{}]{}'.format('base-GGLM', 'train'), max=len(mytrainloader))
    since = time.time()

    for index, ((img1,label1), (img2,label2), (img3,label3)) in enumerate(mytrainloader):
        data_timer.update(time.time() - since)
        if cuda_gpu:
            img1 = img1.cuda()
            label1=label1.cuda()
            img2 = img2.cuda()
            label2=label2.cuda()
            img3 = img3.cuda()
            label3=label3.cuda()
        img1 = img1.float()
        img2 = img2.float()
        img3 = img3.float()
        img1, img2, img3 = Variable(img1), Variable(img2), Variable(img3)

        optimizer.zero_grad()
        try:

            o1=mymodel(img1)
            o2=mymodel(img2)
            o3=mymodel(img3)

            out1=o1['out'];f1=o1['feature'];
            out2=o2['out'];f2=o2['feature'];
            out3=o3['out'];f3=o3['feature'];

            if args.balancedgpu:
                tloss = DataParallelCriterion(buildLoss.TripletLoss())

            else:
                tloss=buildLoss.TripletLoss()
            tripletloss1 = tloss(f1, f2, f3)
            #tripletloss2=tloss(out1,out2,out3)
            #loss = 0.8*tripletloss1+0.2*tripletloss2
            loss=tripletloss1

            if loss.item() > 0:
                trainloss += loss.item()
                record += 1
            loss.backward()
            optimizer.step()
            batch_timer.update(time.time() - since)
            since = time.time()
            prec_losses.update(loss, 1)
            log_msg = ('\n[epoch:{epoch}][iter:({batch}/{size})]' +
                       '[lr:{lr}] loss: {loss:.4f}| eta: ' +
                       '(data:{dt:.3f}s),(batch:{bt:.3f}s),(total:{tt:})') \
                .format(
                epoch=epoch + 1,
                batch=index + 1,
                size=len(mytrainloader),
                lr=scheduler.get_lr()[0],
                loss=prec_losses.avg,
                dt=data_timer.val,
                bt=batch_timer.val,
                tt=bar.elapsed_td)
            print(log_msg)

        except Exception as e:
            print(e)
            continue
        index += 1
        bar.next()
    bar.finish()


    pklword = args.train_dir.split('/')[-1]
    newpkl = 'parameter_%02d.pkl' % (epoch + 1)
    path = args.train_dir.replace(pklword, newpkl)

    is_best = trainloss < min_loss
    if is_best:
        min_loss = trainloss
    save_checkpoint({'epoch': epoch,
                     'model_state_dict': mymodel.state_dict(),
                     'optimizer_state_dict': optimizer.state_dict(),
                     'loss': trainloss,
                     'scheduer': scheduler
                     }, is_best, path)
Exemple #57
0
def fuzz_websockets(ws_address, init_messages, original_messages,
                    session_active_message, ignore_tokens, ignore_errors,
                    output, http_proxy_host, http_proxy_port):
    """
    Creates a websocket connection, sends the payloads, writes output to disk.

    :param ws_address: The websocket address to connect and send messages to

    :param init_messages: The login messages to send before any payloads.
                          This parameter accepts a serialized message (a string) or a
                          function that will generate the string when called.

    :param session_active_message: Wait for this message after sending the init_messages. Usually
                                   This is the message that says: "Login successful". Use None if
                                   there are no messages to wait for.

    :param original_messages: The original messages to be fuzzed
                              This parameter accepts a serialized message (a string) or a
                              function that will generate the string when called.

    :param ignore_tokens: When generating messages with payloads, do not replace these parts
                          of the message. In general you want to set this list to all the
                          keys in the json objects. For example, if the json object looks like
                          {"foo": "bar"} , and you only want to fuzz the "bar" part of the message
                          set ignore_tokens to ["foo"]

    :param ignore_errors: Ignore these errors when they are returned by the application

    :param output: Save all messages here

    :param http_proxy_host: The HTTP host (None if proxy shouldn't be used)
    :param http_proxy_port: The HTTP proxy (None if proxy shouldn't be used)

    :return: None
    """
    logging.info('Starting the fuzzing process...')
    payload_count = len(file(PAYLOADS).readlines())

    with ThreadPoolExecutorWithQueueSizeLimit(max_workers=25) as ex:

        for original_message in original_messages:

            # TODO: Not sure if this is the best place to call the original_message
            #       function, but I need to get the message string to be able to
            #       tokenize it and fuzz it...
            original_message = serialize_message(original_message)

            logging.info('Fuzzing message: %s' % original_message)
            tokenized_messages = create_tokenized_messages(
                original_message, ignore_tokens)

            bar = Bar('Processing',
                      max=len(tokenized_messages) * payload_count)

            for tokenized_count, tokenized_message in enumerate(
                    tokenized_messages):

                for payload in file(PAYLOADS):

                    bar.next()

                    # You might want to modify this if the message is not JSON
                    modified_message = replace_token_in_json(
                        payload, tokenized_message)

                    logging.debug('Generated fuzzed message: %s' %
                                  modified_message)

                    messages_to_send = init_messages[:]
                    messages_to_send.append(modified_message)

                    ex.submit(send_payloads_in_websocket, ws_address,
                              messages_to_send, session_active_message,
                              ignore_errors, tokenized_count, output,
                              http_proxy_host, http_proxy_port)

            bar.finish()

    logging.debug('Finished fuzzing process')
Exemple #58
0
def verify_producer_performance(with_dr_cb=True):
    """ Time how long it takes to produce and delivery X messages """
    conf = {
        'bootstrap.servers': bootstrap_servers,
        'linger.ms': 500,
        'error_cb': error_cb
    }

    p = confluent_kafka.Producer(conf)

    msgcnt = 1000000
    msgsize = 100
    msg_pattern = 'test.py performance'
    msg_payload = (msg_pattern * int(msgsize / len(msg_pattern)))[0:msgsize]

    dr = MyTestDr(silent=True)

    t_produce_start = time.time()
    msgs_produced = 0
    msgs_backpressure = 0
    print('# producing %d messages to topic %s' % (msgcnt, topic))

    if with_progress:
        bar = Bar('Producing', max=msgcnt)
    else:
        bar = None

    for i in range(0, msgcnt):
        while True:
            try:
                if with_dr_cb:
                    p.produce(topic, value=msg_payload, callback=dr.delivery)
                else:
                    p.produce(topic, value=msg_payload)
                break
            except BufferError:
                # Local queue is full (slow broker connection?)
                msgs_backpressure += 1
                if bar is not None and (msgs_backpressure % 1000) == 0:
                    bar.next(n=0)
                p.poll(100)
            continue

        if bar is not None and (msgs_produced % 5000) == 0:
            bar.next(n=5000)
        msgs_produced += 1
        p.poll(0)

    t_produce_spent = time.time() - t_produce_start

    bytecnt = msgs_produced * msgsize

    if bar is not None:
        bar.finish()

    print('# producing %d messages (%.2fMb) took %.3fs: %d msgs/s, %.2f Mb/s' %
          (msgs_produced, bytecnt /
           (1024 * 1024), t_produce_spent, msgs_produced / t_produce_spent,
           (bytecnt / t_produce_spent) / (1024 * 1024)))
    print(
        '# %d temporary produce() failures due to backpressure (local queue full)'
        % msgs_backpressure)

    print('waiting for %d/%d deliveries' % (len(p), msgs_produced))
    # Wait for deliveries
    p.flush()
    t_delivery_spent = time.time() - t_produce_start

    print('# producing %d messages (%.2fMb) took %.3fs: %d msgs/s, %.2f Mb/s' %
          (msgs_produced, bytecnt /
           (1024 * 1024), t_produce_spent, msgs_produced / t_produce_spent,
           (bytecnt / t_produce_spent) / (1024 * 1024)))

    # Fake numbers if not using a dr_cb
    if not with_dr_cb:
        print('# not using dr_cb')
        dr.msgs_delivered = msgs_produced
        dr.bytes_delivered = bytecnt

    print(
        '# delivering %d messages (%.2fMb) took %.3fs: %d msgs/s, %.2f Mb/s' %
        (dr.msgs_delivered, dr.bytes_delivered /
         (1024 * 1024), t_delivery_spent, dr.msgs_delivered / t_delivery_spent,
         (dr.bytes_delivered / t_delivery_spent) / (1024 * 1024)))
    print('# post-produce delivery wait took %.3fs' %
          (t_delivery_spent - t_produce_spent))
Exemple #59
0
    def run_epoch(self, phase, epoch, data_loader):
        model_with_loss = self.model_with_loss
        if phase == 'train':
            model_with_loss.train()
        else:
            if len(self.opt.gpus) > 1:
                model_with_loss = self.model_with_loss.module
            model_with_loss.eval()
            torch.cuda.empty_cache()

        opt = self.opt
        results = {}
        data_time, batch_time = AverageMeter(), AverageMeter()
        avg_loss_stats = {l: AverageMeter() for l in self.loss_stats}
        num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters
        bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters)
        end = time.time()
        for iter_id, batch in enumerate(data_loader):
            if iter_id >= num_iters:
                break
            data_time.update(time.time() - end)

            for k in batch:
                if k != 'meta':
                    batch[k] = batch[k].to(device=opt.device,
                                           non_blocking=True)

            output, loss, loss_stats = model_with_loss(batch)
            loss = loss.mean()
            if phase == 'train':
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
            batch_time.update(time.time() - end)
            end = time.time()

            Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format(
                epoch,
                iter_id,
                num_iters,
                phase=phase,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            for l in avg_loss_stats:
                avg_loss_stats[l].update(loss_stats[l].mean().item(),
                                         batch['input'].size(0))
                Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(
                    l, avg_loss_stats[l].avg)
            if not opt.hide_data_time:
                Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \
                  '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)
            if opt.print_iter > 0:
                if iter_id % opt.print_iter == 0:
                    print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix))
            else:
                bar.next()

            if opt.test:
                self.save_result(output, batch, results)
            del output, loss, loss_stats, batch

        bar.finish()
        ret = {k: v.avg for k, v in avg_loss_stats.items()}
        ret['time'] = bar.elapsed_td.total_seconds() / 60.
        return ret, results
Exemple #60
0
def eval(net, test_loader, device):
    '''
    Parameter:
        model: model after loading weights
        device: gpu or cpu?
    '''
    net.eval().to(device)

    bar = Bar('Processing validate', max=len(test_loader))
    loss_ = AverageMeter()
    acc_ = AverageMeter()
    se_ = AverageMeter()
    sp_ = AverageMeter()
    auc_ = AverageMeter()
    f1_ = AverageMeter()

    draw_curve = True

    preds_prob_list = []
    preds_list = []
    gts_list = []

    with torch.no_grad():
        for i, (data, label) in enumerate(test_loader):
            data = data.to(device)
            label = label.to(device)

            pred_prob, d1_probs, d2_probs, d3_probs, d4_probs = net(
                data)  # after sigmoid function

            validate_loss = calc_loss(pred_prob, label, bce_weight=.5)
            # validate_loss += calc_loss(d2_probs, label, bce_weight=.5)
            # validate_loss += calc_loss(d3_probs, label, bce_weight=.5)
            # validate_loss += calc_loss(d4_probs, label, bce_weight=.5)
            loss_.update(validate_loss.item(), data.size(0))

            preds = torch.gt(pred_prob, .5).float()

            # Convert to numpy format
            preds = preds.cpu().data.numpy()[:, 0]
            label = label.cpu().data.numpy()[:, 0]
            pred_prob = pred_prob.cpu().data.numpy()[:, 0]

            pred_prob = pred_prob.reshape([-1])
            gt = label.reshape([-1])
            preds = preds.reshape([-1])
            CM = confusion_matrix(preds, gt)
            F1, Acc, Se, Sp, _ = calculate_Accuracy(CM)
            Auc = roc_auc_score(gt, pred_prob)

            if draw_curve:
                preds_prob_list.append(pred_prob)
                preds_list.append(preds)
                gts_list.append(gt)

            acc_.update(Acc, data.size(0))
            se_.update(Se, data.size(0))
            sp_.update(Sp, data.size(0))
            auc_.update(Auc, data.size(0))
            f1_.update(F1, data.size(0))

            bar.suffix = '{batch}/{size}) | Loss: {loss:.3f}'.format(
                batch=i + 1, size=len(test_loader), loss=loss_.avg)
            bar.next()
        bar.finish()

    print('Acc: %s  |  F1: %s |  Se: %s |  Sp: %s |  Auc: %s' % (str(
        acc_.avg), str(f1_.avg), str(se_.avg), str(sp_.avg), str(auc_.avg)))

    if draw_curve:
        # https://github.com/RanSuLab/DUNet-retinal-vessel-detection
        _preds_prob = np.asarray(preds_prob_list).reshape(-1)
        _preds = np.asarray(preds_list).reshape(-1)
        _gts = np.asarray(gts_list).reshape(-1)

        # Area under the ROC curve
        fpr, tpr, thresholds = roc_curve(_gts, _preds_prob)
        auc_roc = roc_auc_score(_gts, _preds_prob)
        plt.figure()
        # plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
        plt.plot(fpr, tpr, 'darkorange', label='(AUC = %0.4f)' % auc_roc)
        # plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.0])
        plt.title('ROC Curve', fontsize=14)
        plt.xlabel("FPR (False Positive Rate)", fontsize=14)
        plt.ylabel("TPR (True Positive Rate)", fontsize=14)
        plt.legend(loc="lower right")
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        # skplt.metrics.plot_roc(_gts, _preds_prob)
        plt.savefig("ROC.png")

        # Precision-recall curve
        precision, recall, thresholds = precision_recall_curve(
            _gts, _preds_prob)
        precision = np.fliplr([
            precision
        ])[0]  # so the array is increasing (you won't get negative AUC)
        recall = np.fliplr([
            recall
        ])[0]  # so the array is increasing (you won't get negative AUC)
        auc_prec_rec = np.trapz(precision, recall)
        plt.figure()
        plt.plot(recall,
                 precision,
                 'darkorange',
                 label='Area Under the Curve (AUC = %0.4f)' % auc_prec_rec)
        plt.title('Precision - Recall curve', fontsize=14)
        plt.xlabel("Recall", fontsize=14)
        plt.ylabel("Precision", fontsize=14)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.0])
        plt.legend(loc="lower right")
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        plt.savefig("Precision_Recall.png")

    # according to f1 score
    return f1_.avg, loss_.avg