Ejemplo n.º 1
0
def torcURL(address, filename):
	print('cURL on ' + address + ' to ' + filename + '\n')
	bar = Bar('Running', max=100)
	for i in range(100):
		output = io.BytesIO()
		torcURL = pycurl.Curl()
		torcURL.setopt(pycurl.URL, address)
		torcURL.setopt(pycurl.PROXY, '127.0.0.1')
		torcURL.setopt(pycurl.PROXYPORT, SOCKS_PORT)
		torcURL.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5_HOSTNAME)
		torcURL.setopt(pycurl.WRITEFUNCTION, output.write)
		bar.next()
	bar.finish()

	try:
		torcURL.perform()
		return output.getvalue()
		fp = open(filename, 'wb')
		fp.write(output.getvalue().encode('utf-8').strip())
		fp.close()
	except KeyboardInterrupt:
		raise KeyboardInterrupt
	except pycurl.error as e:
		return "Unable to reach %s (%s)" % (address, e)
		UnknownError()
	except Exception as e:
		UnknownError()
Ejemplo n.º 2
0
def evaluate(train_file_path, test_num, tagger, output_file_path):
    sents = parse_train_data(train_file_path)
    test_start = len(sents) - test_num - 1
    test_data = sents[test_start:len(sents)-1]
    train_data = sents[0:test_start+1]
    print 'Training with {0} sentences'.format(len(train_data))
    tagger.train(train_data)
    output = open(output_file_path, 'w')
    correct = 0
    total = 0
    bar = Bar('Testing with {0} sentences'.format(len(test_data)), max=len(test_data))
    for s in test_data:
        tagged = tagger.tag(remove_tags(s))
        # evaluate
        correct += evaluate_sentence(s, tagged)
        total += len(tagged)
        # write
        words = []
        for t in tagged:
            words.append(t[0] + '_' + t[1])
        output.write('\t'.join(words) + '\n')
        bar.next()
    bar.finish()
    output.close()
    return correct / float(total) * 100
Ejemplo n.º 3
0
 def parse(self, dataset):
     """
     :type dataset: nala.structures.data.Dataset
     """
     outer_bar = Bar('Processing [SpaCy]', max=len(list(dataset.parts())))
     for part in dataset.parts():
         sentences = part.get_sentence_string_array()
         for index, sentence in enumerate(sentences):
             doc = self.nlp(sentence)
             for token in doc:
                 tok = part.sentences[index][token.i]
                 tok.features = {
                                 'id': token.i,
                                 'pos': token.tag_,
                                 'dep': token.dep_,
                                 'lemma': token.lemma_,
                                 'prob': token.prob,
                                 'is_punct': token.is_punct,
                                 'is_stop': token.is_stop,
                                 'cluster': token.cluster,
                                 'dependency_from': None,
                                 'dependency_to': [],
                                 'is_root': False,
                                }
                 part.tokens.append(tok)
             for tok in doc:
                 self._dependency_path(tok, index, part)
         part.percolate_tokens_to_entities()
         part.calculate_token_scores()
         part.set_head_tokens()
         outer_bar.next()
     outer_bar.finish()
     if self.constituency_parser == True:
         self.parser.parse(dataset)
def hough(im, ntx=460, mry=360):


    pim = im.load()
    nimx, mimy = im.size
    mry = int(mry/2)*2
    him = Image.new("L", (ntx, mry), 255)
    phim = him.load()

    rmax = hypot(nimx, mimy)
    dr = rmax / (mry/2)
    dth = pi / ntx
    bar = Bar('Processing', max=nimx)
    for jx in xrange(nimx):
        for iy in xrange(mimy):
            col = pim[jx, iy]
            if col == 255: continue
            for jtx in xrange(ntx):
                th = dth * jtx
                r = jx*cos(th) + iy*sin(th)
                iry = mry/2 + int(r/dr+0.5)
                try:
                    phim[jtx, iry] -= 1
                except:
                    print 'error'
        bar.next()
    del bar
    return him
Ejemplo n.º 5
0
def read_and_gen(lyric_path,file_path):
    """
    read file and generate mp3 sound file
    :param file_path:
    :return:
    """

    #remove original before adding new content in it
    if os.path.exists(file_path):
        os.remove(file_path)

    with open(lyric_path, encoding="utf-8") as file:
        file = file.readlines()
    bar = Bar('Processing', max=file.__len__())
    for line in file:
        if is_alphabet(line[0]):
            #line should be spoken in en
            speak = gtts_extends(line,lang='en')
            speak.sequence_save(file_path)

        if is_chinese((line[0])):
            speak = gtts_extends(line, lang='zh')
            speak.sequence_save(file_path)
        bar.next()
    bar.finish()
    print("transform success!")
Ejemplo n.º 6
0
def get_list(filename):
	"""
	Creates an array of objects out of 
	input training file
	==================================
	Returns:
		* array of objects where each
		object corresponds to a document
	==================================
	"""

	fo = open(filename)
	lines = fo.readlines()
	fo.close()
	total = len(lines)
	obj_arr = []
	vec_arr = []
	bar = Bar("Processing", max=total, suffix='%(percent)d%% | %(index)d of %(max)d | %(eta)d seconds remaining.')
	num = 0
	for each in lines:
		send_obj = files(each.split('\n')[0].split('\t'))
		send_obj.set_word_count(5)
		send_obj.set_pos_features()
		send_obj.set_punctuation_features()
		send_obj.set_vectors()
		obj_arr.append(send_obj)
		bar.next()
	bar.finish()
	return obj_arr
Ejemplo n.º 7
0
 def set_image_objects(self):
     landsat8 = "(acquisitionDate >= date'2013-01-01' AND acquisitionDate <= date'2016-12-31') AND (dayOfYear >=1 AND dayOfYear <= 366) AND (sensor = 'OLI') AND (cloudCover <= 20)"
     landsat7 = "(acquisitionDate >= date'2003-01-01' AND acquisitionDate <= date'2016-12-31') AND (dayOfYear >=1 AND dayOfYear <= 366) AND (sensor = 'ETM_SLC_OFF') AND (cloudCover <= 20)"
     landsat4_5 = "(acquisitionDate >= date'1982-01-01' AND acquisitionDate <= date'2011-12-31') AND (dayOfYear >=1 AND dayOfYear <= 366) AND (sensor = 'TM') AND (cloudCover <= 20)"
     landsat1_5 = "(acquisitionDate >= date'1972-01-01' AND acquisitionDate <= date'2013-12-31') AND (dayOfYear >=1 AND dayOfYear <= 366) AND (sensor = 'MSS') AND (cloudCover <= 20)"
     queries_name = ["landsat8","landsat7","landsat4_5","landsat1_5"]
     queries = [landsat8,landsat7,landsat4_5,landsat1_5]
     # query = self._query(parms)
     obj = []
     count = 0
     for q in queries:
         parms = {
             "f":"json",
             "where":q,
             "geometry":self.bounding_box["geometry"],
             "returnGeometry":"false",
             "spatialRel":"esriSpatialRelIntersects",
             "geometryType":"esriGeometryEnvelope",
             "inSR":self.bounding_box["geometry"]["spatialReference"]["wkid"],
             "outSR":self.bounding_box["geometry"]["spatialReference"]["wkid"],
             "outFields":"*",
             "orderByFields":"dayOfYear"
         }
         query = self._query(parms)
         bar = Bar("Requesting data: "+queries_name[count] , max=len(queries))
         for i in query["features"]:
             obj.append(i)
             bar.next()
         bar.finish()
         count = count + 1
     return obj
Ejemplo n.º 8
0
def tokenize_proteins(data, msg='Processing proteins'):
    """Distribute all poses into either decoys list or actives OrderedDict.
    Poses placed into the actives OrderedDict are further organized into
    sublists for each ligand.

    args:
        @data list of string lines containing pose data
        @msg string message to display in progress bar
    returns:
        @actives OrderedDict of all active poses gathered from data
        @decoys list of all decoy poses gathered from data
    """

    actives = OrderedDict()
    decoys  = list()
    bar = Bar(msg, max=len(data))

    for i, line in enumerate(data):
        bar.next()
        pose = posedict(line)  # Token -> List
        if pose['label'] == 1:  # Pose -> Decoys
            pose['id'] = pose['ligand'] + '-' + str(i)
            actives.setdefault(pose['ligand'], []).append(pose)
        else:  # Pose -> Actives
            decoys.append(pose)
    bar.finish()
    print ""

    return actives, decoys
Ejemplo n.º 9
0
def saveAverageImage(kitti_base, pos_labels, shape, fname, avg_num=None):
    num_images = float(len(pos_labels))
    avg_num = min(avg_num, num_images)
    if avg_num is None:
        avg_num = num_images

    # avg_img = np.zeros((shape[0],shape[1],3), np.float32)
    avg_img = np.zeros(shape, np.float32)
    progressbar = ProgressBar('Averaging ' + fname, max=len(pos_labels))
    num = 0
    for label in pos_labels:
        if num >= avg_num:
            break
        num += 1
        progressbar.next()
        sample = getCroppedSampleFromLabel(kitti_base, label)
        # sample = np.float32(sample)

        resized = resizeSample(sample, shape, label)

        resized = auto_canny(resized)
        resized = np.float32(resized)

        avg_img = cv2.add(avg_img, resized / float(avg_num))
    progressbar.finish()

    cv2.imwrite(fname, avg_img)
Ejemplo n.º 10
0
    def handle(self, *args, **options):
        re_summary = re.compile(r"<(\w*) class=\"?summary\"?>.*?</\1>", flags=re.I | re.S)
        re_section = re.compile(r"\<h2>(.*?)(</h2>|<br>)", flags=re.I | re.S)
        bar = Bar(width=20, suffix="%(percent)d%% %(index)d/%(max)d %(elapsed_td)s ETA %(eta_td)s")

        qs = Article.objects.filter(source="BHC")
        for a in bar.iter(qs):
            a.section_set.all().delete()
            s = a.content
            n = len(s)
            summary_match = re_summary.search(s)
            start = summary_match.end() + 1 if summary_match else 0
            section_no = 0
            section_name = "Introduction"

            def add_section(content):
                section = Section(section_no=section_no, title=section_name)
                section.article = a
                section.content = content.strip()
                section.save()

            for h2 in re_section.finditer(s):
                if h2.start() >= start:
                    add_section(s[start : h2.start() - 1])
                    section_no += 1
                section_name = h2.group(1)
                start = h2.end()
            else:
                if start < n:
                    add_section(s[start:n])
Ejemplo n.º 11
0
def main():
    infile = raw_input('Input file name: ')
    if os.path.exists(infile):
        print '\n[!] Loading PCAP file. Please wait, it might take a while...'
        ips = sorted(set(p[IP].src for p in PcapReader(infile) if IP in p))

        total = len(ips)
        print '[!] Total number of IP addresses: %d\n' % total

        bar = Bar('Processing', max=total)
        for ip in ips:
            get_data(ip)
            bar.next()
        bar.finish()

        headers = ['IP', 'OWNER','COUNTRY', 'ORGANIZATION','SERVER','DESCRIPTION']
        print '\n\n'
        print tabulate(table,headers,tablefmt='grid')
        if exceptions:
            print '\nExceptions:'
            for e in exceptions:
                print '*\t%s' % e
            print '\n\n[!] Done.\n\n'
    else:
        print '[!] Cannot find file "%s"\n\tExiting...' % infile
        sys.exit()
Ejemplo n.º 12
0
def hydrate(idlist_file="data/example_dataset_tweet_ids.txt"):
    """
    This function reads a file with tweet IDs and then loads them
    through the API into the database. Prepare to wait quite a bit,
    depending on the size of the dataset.
    """
    ids_to_fetch = set()
    for line in open(idlist_file, "r"):
        # Remove newline character through .strip()
        # Convert to int since that's what the database uses
        ids_to_fetch.add(int(line.strip()))
    # Find a list of Tweets that we already have
    ids_in_db = set(t.id for t in database.Tweet.select(database.Tweet.id))
    # Sets have an efficient .difference() method that returns IDs only present
    # in the first set, but not in the second.
    ids_to_fetch = ids_to_fetch.difference(ids_in_db)
    logging.warning(
        "\nLoaded a list of {0} tweet IDs to hydrate".format(len(ids_to_fetch)))

    # Set up a progressbar
    bar = Bar('Fetching tweets', max=len(ids_to_fetch), suffix='%(eta)ds')
    for page in rest.fetch_tweet_list(ids_to_fetch):
        bar.next(len(page))
        for tweet in page:
            database.create_tweet_from_dict(tweet)
    bar.finish()
    logging.warning("Done hydrating!")
def main(args):
	d = json.load(open(args.c, 'r'))

	np.random.seed(1234)

	im2id  = {}
	id2cap = {}

	print 'img 2 id....'
	for im in d['images']:
		im2id[im['file_name']] = im['id']

	bar = Bar('id 2 cap...', max=len(d['annotations']))
	for ann in d['annotations']:
		cap = nltk.word_tokenize(ann['caption'])
		cap = ' '.join(cap).lower()
		if ann['image_id'] in id2cap:
			id2cap[ann['image_id']].append(cap)
		else:
			id2cap[ann['image_id']] = [cap]
		bar.next()
	bar.finish()

	with open(args.s, 'r') as f:
		images = f.read().split()

	refs = []
	for im in images:
		refs.append('<>'.join(id2cap[im2id[im]]))

	with open(args.saveto, 'w') as f:
		print >>f, '\n'.join(refs)
class Closest(object):
    data = pd.DataFrame()
    cols = []
    bar = None

    def __init__(self, df, cols, size):
        self.data = df
        self.cols = cols
        self.bar = Bar(message="Compressing Time", max=size,
                       suffix="%(percent)d%% (%(index)d/%(max)d) ETA %(eta_td)s")
        return

    def __call__(self, row):
        self.bar.next()
        found = self.data[(self.data.restaurant_id == row.restaurant_id) & (self.data.date <= row.date)]
        if found.shape[0] == 0:
            # FIXME Do something smarter than averaging?
            found = self.data[(self.data.restaurant_id == row.restaurant_id)][self.cols].mean()
        else:
            found = found[self.cols].sum()
        # FIXME Sometimes NaNs appear if I am missing the restaurant ID.  What to do?
        found.fillna(0, inplace=True)
        row[self.cols] = found
        return row

    def __del__(self):
        self.bar.finish()
Ejemplo n.º 15
0
def average_image(pos_region_generator, shape, avg_num=None):
    pos_regions = list(pos_region_generator)

    num_images = float(len(pos_regions))
    if avg_num is None:
        avg_num = num_images
    else:
        avg_num = min(avg_num, num_images)

    window_dims = (shape[1], shape[0])

    # avg_img = np.zeros((shape[0],shape[1],3), np.float32)
    avg_img = np.zeros(shape, np.float32)
    progressbar = ProgressBar('Averaging ', max=avg_num)
    num = 0
    for reg in pos_regions:
        if num >= avg_num:
            break
        num += 1
        progressbar.next()

        resized = reg.load_cropped_resized_sample(window_dims)

        resized = auto_canny(resized)
        resized = np.float32(resized)

        avg_img = cv2.add(avg_img, resized / float(avg_num))
    progressbar.finish()

    return avg_img
Ejemplo n.º 16
0
def main(argv):
	args = argparser.parse_args()

	print >> sys.stderr, '# Start: Matching: %s' % (datetime.datetime.now().time().isoformat())

	masterbrain = read(args.masterbrain)
	keywords = read(args.keywords)

	bar = Bar('Processing', max=len(masterbrain), suffix ='%(percent).1f%% - %(eta)ds')

	regex = {}
	for keyword in keywords:
		regex[keyword] = re.compile(r'\b({0})\b'.format(keyword))

	matches = 0
	for string in masterbrain:
		for keyword in keywords:
			if regex[keyword].search(string):
				matches = matches + 1
				print 1, "\t", string, "\t", keyword
				break
		else:
			print 0, "\t", string
		bar.next()

	bar.finish()

	print matches, "/", len(masterbrain)

	print >> sys.stderr, '# End: Matching: %s' % (datetime.datetime.now().time().isoformat())
Ejemplo n.º 17
0
        def _rforest_plot(self, pen_params):
            bar = Bar(
                width=40,
                suffix='%(percent)d%%'
            )

            X, Y = np.meshgrid(pen_params['n_estimators'],
                               pen_params['max_depth'])
            print 'Getting errors for {}...'.format(self.method)
            Z = np.array([
                             self.k_fold_results(**{
                                 'n_estimators': x,
                                 'max_depth': y
                             }).mean() for x in pen_params['n_estimators']
                             for y in bar.iter(pen_params['max_depth'])
                             ])
            Z.shape = (len(X), len(Y))
            fig, ax = plt.subplots()

            p = ax.contourf(X, Y, Z,
                            cmap='RdYlBu')
            ax.set_xlabel('n_estimators')
            ax.set_ylabel('max_depth')
            ax.set_title('rforest test error rate')
            plt.colorbar(p)
            plt.savefig('test_error_rforest.png')
Ejemplo n.º 18
0
    def handle(self, *args, **options):
        if len(args) != 1:
            raise CommandError(
                'dame el geojson, pa'
            )

        geojson = args[0]
        if geojson.startswith('http'):
            fh = urllib2.urlopen(geojson)
        else:
            fh = open(args[0])
        self.data = json.load(fh)

        suc_dir = os.path.join(settings.DATASETS_ROOT, 'sucursales')
        if not os.path.exists(suc_dir):
            os.makedirs(suc_dir)

        FILENAME = self.FILENAME % datetime.now().strftime("%Y-%m-%d-%H%M%S")
        FILENAME = os.path.join(suc_dir, FILENAME)
        writer = unicodecsv.DictWriter(open(FILENAME, 'wb'),
                                       fieldnames=self.get_columnas())
        writer.writeheader()
        bar = Bar('Convirtiendo ', suffix='%(percent)d%%')
        for feature in bar.iter(self.entrada()):
            sucursal = self.parse_sucursal(feature)
            writer.writerow(sucursal)
Ejemplo n.º 19
0
def pipeline_pos(titles, descriptions, tags):
    def preprocess(inpt):
        return inpt

    # Create feature vectors of context and only keep images WITH context
    bar = Bar('Extracting features...', max=len(titles))
    pos_collection = []
    for i in xrange(len(titles)):
        # Stem words and remove stopwords for title...
        context = []
        title = preprocess(titles[i].split(' '))
        if title:
            context.append(title)
        # ... description (for each sentence) ...
        for desc in sent_tokenize(descriptions[i]):
            desc = preprocess(desc.split(' '))
            if desc:
                context.append(desc)
        # ... and tagsc
        ts = preprocess(tags[i])
        if ts:
            context.append(ts)
        
        pos = nltk.pos_tag_sents(context)
        pos = list(itertools.chain(*pos))
        pos_collection.append(pos)
        bar.next()
    bar.finish()

    return pos_collection
Ejemplo n.º 20
0
def main(argv):
  args = argparser.parse_args()

  print >> sys.stderr, '# Start: Keyword Data: %s, %s, %s, %s' % (args.cc, args.week, args.pages, datetime.datetime.now().time().isoformat())

  ga, gsc = initialize_service(argv, "analytics"), initialize_service(argv, "webmasters")

  print '"%s"\t"%s"\t"%s"\t"%s"\t"%s"\t"%s"\t"%s"\t"%s"\t"%s"\t"%s"' % ("cc", "website", "url", "date", "keyword", "impressions", "clicks", "ctr", "position", "sessions (week)")
  
  bar = Bar('Processing', max=args.pages, suffix ='%(percent).1f%% - %(eta)ds')
  
  for website in GA_IDS[args.cc]:

    urls = get_top_landing_pages(ga, args.cc, website, args.week, args.pages)
    for row in urls:

      data = []

      # we switched from http to https between week 3 and 4
      if (args.week <= 4 and args.cc != 'VN') or website != "IPRICE":
        data.extend(get_keyword_data(gsc, args.cc, website, args.week, row[0][1:], "http"))
      if (args.week >=3 or args.cc == 'VN') and website == "IPRICE":
        data.extend(get_keyword_data(gsc, args.cc, website, args.week, row[0][1:], "https"))

      output(args.cc, website, row[0], row[1], data)

      bar.next()
    bar.finish()
      
  print >> sys.stderr, '# End: Keyword Data: %s, %s, %s, %s' % (args.cc, args.week, args.pages, datetime.datetime.now().time().isoformat())
Ejemplo n.º 21
0
 def getUsers(hubname):
     log = open(HubAnalyzer.logfile, "a")
     print("hub: " + hubname + " ----------------- ", file=log)
     print(time.strftime("%H:%M:%S"), file=log)
     # clean the file to write users to
     url = HubAnalyzer.hubname2link(hubname)
     output_filename = "data/hubs/" + hubname
     # if data is here, do nothing
     if os.path.isfile(output_filename) and not HubAnalyzer.enforce_download_in_presence_of_data:
         print("data is already here, abort this url", file=log)
         return None
     output_file = open(output_filename, "w")
     try:
         last_page_num = int(HubAnalyzer.getLastPageNumber(url))
     except Exception as err:
         print("URL is broken, abort the url", file=log)
         log.flush()
         os.remove(output_filename)
         raise Exception("Cannot analyze the page, please, check the url below: \n" + url)
     # get connection to habrahabr-hub
     suffix = "/subscribers/rating/page"
     userlist_url = url + suffix
     http = urllib3.PoolManager()
     if HubAnalyzer.report_downloading_progress:
         HubAnalyzer.get_hub_description(hubname)
         bar = Bar("Downloading: " + hubname, max=last_page_num, suffix="%(percent)d%%")
     for i in range(1, last_page_num + 1):
         user_page = userlist_url + str(i)
         print(user_page, file=log)
         log.flush()
         try:
             response = http.request("GET", user_page)
         except urllib3.exceptions.HTTPError as err:
             if err.code == 404:
                 print(user_page + " !! 404 !!", file=log)
                 log.flush()
                 output_file.close()
                 os.remove(output_filename)
                 raise ("Hub is not found, please, check the url")
             else:
                 print(user_page + " PARSING ERROR ", file=log)
                 log.flush()
                 output_file.close()
                 os.remove(output_filename)
                 raise Exception("Error: cannot parse the page!")
         html = response.data
         soup = BeautifulSoup(html)
         usersRow = soup.find_all(class_="user ")
         for userRow in usersRow:
             username = userRow.find(class_="username").text
             print(username, file=output_file)
         output_file.flush()
         if HubAnalyzer.report_downloading_progress:
             bar.next()
     # finalize and close everything
     if HubAnalyzer.report_downloading_progress:
         bar.finish()
     output_file.close()
     log.close()
Ejemplo n.º 22
0
 def make_me_a_rockstar(self):
     self.repo = git.Repo.init(self.repo_path)
     progress_msg = 'Making you a Rockstar Programmer'
     bar = Bar(progress_msg, suffix='%(percent)d%%')
     for commit_date in bar.iter(self._get_dates_list()):
         self._edit_and_commit(str(uuid.uuid1()), commit_date)
     self._make_last_commit()
     print('\nYou are now a Rockstar Programmer!')
Ejemplo n.º 23
0
    def evolve(self, population, cxpb, mutpb, mutfq, ngen, goal):

        # Cheapest classifier.
        clf = LinearRegression(normalize=True)

        # Evaluate fitnesses of starting population.
        fitness_list = map(lambda x: self.evaluate(x, clf), population)

        # Assign fitness values.
        for individual, fitness in zip(population, fitness_list):
            individual.fitness.values = fitness

        best = max(population, key=lambda x: x.fitness.values[0])

        # So that we know things are happening.
        bar = Bar('Evolving', max=ngen)

        # Evolution!
        for gen in xrange(ngen):

            if best.fitness.values[0] > goal:
                break

            # Select the next generation of individuals.
            offspring = []
            offspring.append(best)
            offspring += tools.selTournament(population, len(population)-1, 10)
            offspring = map(self.toolbox.clone, offspring)

            # Apply crossovers.
            for child_a, child_b in zip(offspring[::2], offspring[1::2]):  # Staggered.
                if random.random() < cxpb:
                    self.crossover(child_a, child_b, cxpb)
                    del child_a.fitness.values
                    del child_b.fitness.values

            # Apply mutations.
            for child in offspring:
                if random.random() < mutpb:
                    self.mutate(child, mutfq)
                    del child.fitness.values

            # Reevaluate fitness of changed individuals.
            new_children = [e for e in offspring if not e.fitness.valid]
            fitness_list = map(lambda x: self.evaluate(x, clf), population)
            for individual, fitness in zip(new_children, fitness_list):
                individual.fitness.values = fitness

            # Replace old population with new generation.
            best = max(population, key=lambda x: x.fitness.values[0])
            population = offspring

            # Progress!
            bar.next()

        # Done! Return the most fit evolved individual.
        bar.finish()
        return best
def do_epoch(mode, epoch, skipped=0):
    # mode is 'train' or 'test'
    y_true = []
    y_pred = []
    avg_loss = 0.0
    prev_time = time.time()

    batches_per_epoch = dmn.get_batches_per_epoch(mode)

    if mode=="test":
        batches_per_epoch=min(1000,batches_per_epoch)
    bar=Bar('processing',max=batches_per_epoch)
    for i in range(0, batches_per_epoch):
        step_data = dmn.step(i, mode)
        prediction = step_data["prediction"]
        answers = step_data["answers"]
        current_loss = step_data["current_loss"]
        current_skip = (step_data["skipped"] if "skipped" in step_data else 0)
        log = step_data["log"]

        skipped += current_skip

        if current_skip == 0:
            avg_loss += current_loss

            for x in answers:
                y_true.append(x)

            for x in prediction.argmax(axis=1):
                y_pred.append(x)

            # TODO: save the state sometimes
            if (i % args.log_every == 0):
                cur_time = time.time()
                #print ("  %sing: %d.%d / %d \t loss: %.3f \t avg_loss: %.3f \t skipped: %d \t %s \t time: %.2fs" %
                #    (mode, epoch, i * args.batch_size, batches_per_epoch * args.batch_size,
                #     current_loss, avg_loss / (i + 1), skipped, log, cur_time - prev_time))
                prev_time = cur_time

        if np.isnan(current_loss):
            print "==> current loss IS NaN. This should never happen :) "
            exit()
        bar.next()
    bar.finish()

    avg_loss /= batches_per_epoch
    print "\n  %s loss = %.5f" % (mode, avg_loss)
    print "confusion matrix:"
    print metrics.confusion_matrix(y_true, y_pred)

    accuracy = sum([1 if t == p else 0 for t, p in zip(y_true, y_pred)])
    print "accuracy: %.2f percent" % (accuracy * 100.0 / batches_per_epoch / args.batch_size)

    if len(accuracies)>0 and accuracies[-1]>accuracy:
        dmn.lr=dmn.lr*args.learning_rate_decay
    accuracies.append(accuracy)
    return avg_loss, skipped
Ejemplo n.º 25
0
def main():
    dialect = csv.Sniffer().sniff(EJEMPLO)
    reader = csv.reader(open(sys.argv[1]), dialect=dialect)
    writer = csv.DictWriter(open('productos.csv', 'w'), fieldnames=PRODUCTO_COLS)
    writer.writeheader()
    bar = Bar('Normalizando CSV', suffix='%(percent)d%%')
    for l in bar.iter(reader):
        data = normalizar(dict(zip(headers, l)))
        writer.writerow(data)
Ejemplo n.º 26
0
 def resample(self, rm, num_points):
     node = point.make(self.start.x, self.start.y)
     rm.insert(node)
     bar = Bar("Generating Roadmap", max=num_points)
     for i in xrange(num_points):
         sample = point.get_random_point(self.width, self.height)
         rm.insert(sample)
         bar.next()
     bar.finish()
     return rm
Ejemplo n.º 27
0
def save_regions(reg_gen, num_regions, window_dims, save_dir):
    progressbar = ProgressBar('Saving regions', max=num_regions)
    index = 0
    for img_region in itertools.islice(reg_gen, 0, num_regions):
        fname = os.path.join(save_dir, '{:06d}.png'.format(index))
        index += 1
        sample = img_region.load_cropped_resized_sample(window_dims)
        cv2.imwrite(fname, sample)
        progressbar.next()
    progressbar.finish()
Ejemplo n.º 28
0
 def get_stale_files(self, media_files):
     django_models_with_file_fields = self.get_django_models_with_file_fields()
     stale_files = []
     bar = Bar('Analyzing media files', max=len(media_files))
     for media_file in media_files:
         if not self.remove_file_if_not_exists_in_db(media_file, django_models_with_file_fields):
             stale_files.append(media_file)
         bar.next()
     bar.finish()
     return stale_files
Ejemplo n.º 29
0
def update_api(contract_paths, old_api):
    bar = Bar("Contracts", max=len(contract_paths))
    new_api = {"events": {}, "functions": {}}
    for contract_name, contract_path in contract_paths.items():
        events_api, functions_api = update_contract_api(contract_name, contract_path, old_api)
        if bool(events_api): new_api["events"].update(events_api)
        new_api["functions"][contract_name] = functions_api
        bar.next()
    bar.finish()
    return new_api
Ejemplo n.º 30
0
Archivo: status.py Proyecto: noa/brute
def main():
    # Read arguments
    args = get_args()

    assert os.path.isdir(args.workspace), "not a directory: " + args.workspace
    
    # Read the configuration file
    config = get_conf(args)

    # Dictionary to store job results
    job_status = dict()

    # Number of iles
    nfiles = 0
    for f in os.listdir(args.workspace):
        if fnmatch.fnmatch(f, '*.params'):
            nfiles += 1
        
    bar = Bar('Processing', max=nfiles)
    
    for f in os.listdir(args.workspace):
        if fnmatch.fnmatch(f, '*.params'):
            expt_params = os.path.join(args.workspace, f)
            prm_str = get_param_str(expt_params)
            path, ext = os.path.splitext(f)
            job_num = get_job_num(path)
            expt_dir = os.path.join(args.workspace, path)
            for f2 in os.listdir(expt_dir):
                if fnmatch.fnmatch(f2, '*.txt'): # job log output
                    log = os.path.join(expt_dir, f2)
                    status = get_job_status(log, config)
                    job_status[job_num] = status
                    break
            bar.next()

    # End the progress bar
    bar.finish()

    if args.verbose:
        for n in job_status:
            print('n='+n+' status='+str(job_status[n]))

    if True:
        print('JOB STATUS')
        print('--------------------------')
        import collections
        stats = collections.defaultdict(int)
        for n in job_status:
            stats[job_status[n]] += 1
        total = 0
        for k in stats:
            total += stats[k]
            print(str(k) + ' : ' + str(stats[k]))
        print('--------------------------')
        print('TOTAL: ' + str(total))    
Ejemplo n.º 31
0
def format_data(exif_array):
    print("ExIF ARRAY", exif_array)
    """
    :param exif_array:
    :return:
    """
    exif_array.sort(key=itemgetter('EXIF:DateTimeOriginal'))
    feature_coll = dict(type="FeatureCollection", features=[])
    linecoords = []
    img_stuff = []
    datetime = ''
    sensor = ''
    sensor_make = ''
    i = 0
    bar = Bar('Creating GeoJSON', max=len(exif_array))
    for tags in iter(exif_array):
        i = i + 1
        for tag, val in tags.items():
            if tag in ('JPEGThumbnail', 'TIFFThumbnail', 'Filename',
                       'EXIF MakerNote'):
                exif_array.remove(tag)
        try:
            lat = float(tags['XMP:Latitude'])
            long = float(tags['XMP:Longitude'])
            imgwidth = tags['EXIF:ImageWidth']
            imghite = tags['EXIF:ImageHeight']
            r_alt = float(tags['XMP:RelativeAltitude'])
            a_alt = float(tags['XMP:AbsoluteAltitude'])
        except KeyError as e:
            lat = float(tags['Composite:GPSLatitude'])
            long = float(tags['Composite:GPSLongitude'])
            imgwidth = tags['EXIF:ExifImageWidth']
            imghite = tags['EXIF:ExifImageHeight']
            r_alt = float(tags['XMP:Altitude'])
            a_alt = float(tags['XMP:Altitude'])
        coords = [long, lat, r_alt]
        linecoords.append(coords)
        ptProps = {
            "File_Name": tags['File:FileName'],
            "Exposure Time": tags['EXIF:ExposureTime'],
            "Focal_Length": tags['EXIF:FocalLength'],
            "Date_Time": tags['EXIF:DateTimeOriginal'],
            "Image_Width": imgwidth,
            "Image_Height": imghite,
            "Heading": tags['XMP:FlightYawDegree'],
            "RelativeAltitude": r_alt,
            "AbsoluteAltitude": a_alt,
            "Relative_Altitude": tags['XMP:RelativeAltitude'],
            "FlightRollDegree": tags['XMP:FlightRollDegree'],
            "FlightYawDegree": tags['XMP:FlightYawDegree'],
            "FlightPitchDegree": tags['XMP:FlightPitchDegree'],
            "GimbalRollDegree": tags['XMP:GimbalRollDegree'],
            "GimbalYawDegree": tags['XMP:GimbalYawDegree'],
            "GimbalPitchDegree": tags['XMP:GimbalPitchDegree'],
            "EXIF:DateTimeOriginal": tags['EXIF:DateTimeOriginal']
        }
        if i == 1:
            datetime = tags['EXIF:DateTimeOriginal']
            sensor = tags['EXIF:Model']
            sensor_make = tags['EXIF:Make']
        img_over = dict(coords=coords, props=ptProps)
        img_stuff.append(img_over)
        ptGeom = dict(type="Point", coordinates=coords)
        points = dict(type="Feature", geometry=ptGeom, properties=ptProps)
        feature_coll['features'].append(points)
        bar.next()
        # gcp_info = long, lat, alt
    img_box = image_poly(img_stuff)
    tiles = img_box[0]
    # write_gcpList(gcp_info)
    if geo_tiff == 'y':
        create_georaster(tiles)
    else:
        print("no georasters.")
    aoi = img_box[1]
    lineGeom = dict(type="LineString", coordinates=linecoords)
    lines = dict(type="Feature", geometry=lineGeom, properties={})
    feature_coll['features'].insert(0, lines)
    mission_props = dict(date=datetime,
                         platform="DJI Mavic 2 Pro",
                         sensor_make=sensor_make,
                         sensor=sensor)
    polys = dict(type="Feature", geometry=aoi, properties=mission_props)
    feature_coll['features'].insert(0, polys)
    for imps in tiles:
        feature_coll['features'].append(imps)
    bar.finish()
    return feature_coll
Ejemplo n.º 32
0
def create_georaster(tags):
    # print(tags)
    """
    :param tags:
    :return:
    """
    out_out = ntpath.dirname(indir + "/output/")
    print("out dir", out_out)
    if not os.path.exists(out_out):
        os.makedirs(out_out)
    bar = Bar('Creating GeoTIFFs', max=len(tags))

    for tag in iter(tags):

        coords = tag['geometry']['coordinates'][0]
        # lonlat = coords[0]
        pt0 = coords[0][0], coords[0][1]
        pt1 = coords[1][0], coords[1][1]
        pt2 = coords[2][0], coords[2][1]
        pt3 = coords[3][0], coords[3][1]

        # print("OMGOMG", poly)
        props = tag['properties']
        # print("PROPS", props)
        # print(props)
        file_in = indir + "/images/" + props['File_Name']
        # print("file In", file_in)
        new_name = ntpath.basename(file_in[:-3]) + 'tif'
        dst_filename = out_out + "/" + new_name
        ds = gdal.Open(file_in, 0)
        gt = ds.GetGeoTransform()
        cols = ds.RasterXSize
        rows = ds.RasterYSize
        ext = GetExtent(gt, cols, rows)
        ext0 = ext[0][0], ext[0][1]
        ext1 = ext[1][0], ext[1][1]
        ext2 = ext[2][0], ext[2][1]
        ext3 = ext[3][0], ext[3][1]
        gcp_string = '-gcp {} {} {} {} ' \
                     '-gcp {} {} {} {} ' \
                     '-gcp {} {} {} {} ' \
                     '-gcp {} {} {} {}'.format(ext0[0], ext0[1],
                                               pt2[0], pt2[1],
                                               ext1[0], ext1[1],
                                               pt3[0], pt3[1],
                                               ext2[0], ext2[1],
                                               pt0[0], pt0[1],
                                               ext3[0], ext3[1],
                                               pt1[0], pt1[1])

        gcp_items = filter(None, gcp_string.split("-gcp"))
        gcp_list = []
        for item in gcp_items:
            pixel, line, x, y = map(float, item.split())
            z = 0
            gcp = gdal.GCP(x, y, z, pixel, line)
            gcp_list.append(gcp)

        srs = osr.SpatialReference()
        srs.ImportFromEPSG(4326)
        wkt = srs.ExportToWkt()
        ds = gdal.Translate(dst_filename,
                            ds,
                            outputSRS=wkt,
                            GCPs=gcp_list,
                            noData=0)
        ds = None
        bar.next()
    bar.finish()
    return
Ejemplo n.º 33
0
def image_poly(imgar):
    """
    :param imgar:
    :return:
    """
    polys = []
    over_poly = []
    bar = Bar('Plotting Image Bounds', max=len(imgar))
    # print("BAR", bar)
    for cent in iter(imgar):
        lat = float(cent['coords'][1])
        lng = float(cent['coords'][0])
        print("**Drones Lng, Lats**", lng, lat)
        prps = cent['props']
        fimr = float(prps['FlightRollDegree'])
        fimp = float(prps['FlightPitchDegree'])
        fimy = float(prps['FlightYawDegree'])
        gimr = float(prps['GimbalRollDegree'])
        gimp = float(prps['GimbalPitchDegree'])
        gimy = float(prps['GimbalYawDegree'])
        wid = prps['Image_Width']
        hite = prps['Image_Height']
        print("**Gimbal Pitch**", gimp, "\n**Gimbal Roll**", gimr,
              "\n**Gimbal Yaw**", gimy)
        # print("**ACFT Pitch**", fimp, "\n**ACFT Roll**", fimr, "\n**ACFT Yaw**", fimy)
        img_n = prps['File_Name']
        print("**file name**", img_n)
        focal_lgth = prps['Focal_Length']
        r_alt = float(prps["RelativeAltitude"])
        a_alt = float(prps["AbsoluteAltitude"])
        # (print("REL", r_alt, "AB", a_alt))
        cds1 = utm.from_latlon(lat, lng)
        poly = new_gross(wid, hite, cds1, r_alt, focal_lgth, gimr, 90 + gimp,
                         gimy, fimr, fimp, fimy)
        # poly = new_gross(wid, hite, cds1, a_alt, focal_lgth, 90 - gimy, 90 + gimp, gimr, fimr, fimp, fimy)
        p2 = convert_wgs_to_utm(lng, lat)
        project = partial(
            pyproj.transform,
            pyproj.Proj(init='epsg:4326'),  # source coordinate system
            pyproj.Proj(init='epsg:%s' % p2))  # destination coordinate system
        g2 = transform(project, poly)
        over_poly.append(g2)

        # Create GeoJSON
        wow3 = geojson.dumps(poly)
        wow4 = json.loads(wow3)
        wow4 = rewind(wow4)
        gd_feat = dict(type="Feature", geometry=wow4, properties=prps)
        # gs1 = json.dumps(gd_feat)
        # print("gs1", gs1)
        polys.append(gd_feat)
        bar.next()
    union_buffered_poly = cascaded_union([l.buffer(.001) for l in over_poly])
    polyz = union_buffered_poly.simplify(0.005, preserve_topology=False)
    projected = partial(
        pyproj.transform,
        pyproj.Proj(init='epsg:%s' % p2),  # source coordinate system
        pyproj.Proj(init='epsg:4326'))  # destination coordinate system
    g3 = transform(projected, polyz)
    pop3 = geojson.dumps(g3)
    pop4 = json.loads(pop3)
    pop4 = rewind(pop4)
    bar.finish()
    return polys, pop4
Ejemplo n.º 34
0
    def Commentaries(self, codeArg, outputArg):
        countLineOutput = 0
        countLineInput = 0
        noCommentary = 0
        isCommentary = 0
        countRecursFiles = 0

        if codeArg == "python":
            detectFiles = "py"
            blockDir = "__pycache__"

            commentariesBeginLine = r"^\#.*"  # Begin '#'
            quoteOfCommentariesMultipleLines = r"^\s*[\"|\']{3}$"  # """ and ''' without before variables and if commentaries is over multiple lines
            quoteInRegex = r"\={1}\s*r[\"|\']{1}"  # If quote in regex
            quoteOfEndCommentariesMultipleLines = r"^\s*[\"|\']{3}\)?\.?"  # """ and ''' without before variables, if commentaries is over multiple lines and he finish by .format() funtion
            quoteOfCommentariesOneLine = r"[\"|\']{3}.*[\"|\']{3}$"  # """ and ''' without before variables and if commentary is over one line, (""" commentaries """)
            quoteIntoVariable = r".*\={1}\s*\w*\.?\w*[\(|\.]{1}[\"|\']{3}|.*\={1}\s*[\"|\']{3}"  # """ and ''' with before variables
            commentariesAfterLine = r"\s*\#[^\"|^\'|^\.|^\?|^\*|^\!|^\]|^\[|^\\|^\)|^\(|^\{|^\}].*"  # '#' after line of code

        recursFiles = [
            f for f in glob.glob("{0}{1}**{1}*.{2}".format(
                outputArg, self.utils.Platform(), detectFiles),
                                 recursive=True)
        ]

        # -- Remove commentaries and Count commentaries will be removed -- #
        for number in recursFiles:
            countRecursFiles += 1

        print("\n[+] Running remove commentaries in {0} file(s)...\n".format(
            countRecursFiles))

        with Bar(PROGRESS_COLOUR + 'Processing', max=countRecursFiles) as bar:
            for file in recursFiles:
                if blockDir in file:
                    continue
                else:
                    # -- Remove commentaries -- #
                    with fileinput.input(file, inplace=True) as inputFile:
                        for eachLine in inputFile:
                            searchCommentariesAfterLine = re.search(
                                commentariesAfterLine, eachLine)
                            searchCommentariesBeginLine = re.search(
                                commentariesBeginLine, eachLine)
                            if codeArg == "python":
                                if "coding" in eachLine or "#!" in eachLine:
                                    print(eachLine)
                                    continue

                                if re.match(quoteInRegex, eachLine):
                                    continue
                                elif re.match(quoteIntoVariable, eachLine):
                                    noCommentary += 1
                                elif re.match(
                                        quoteOfCommentariesMultipleLines,
                                        eachLine
                                ) or re.match(
                                        quoteOfEndCommentariesMultipleLines,
                                        eachLine):
                                    isCommentary += 1
                                else:
                                    pass

                                if re.match(quoteOfCommentariesOneLine,
                                            eachLine):
                                    countLineInput += 1
                                    isCommentary = 0
                                    continue
                                elif isCommentary == 1 and noCommentary == 0:
                                    countLineInput += 1
                                    continue
                                elif isCommentary == 0 and noCommentary == 1:
                                    print(eachLine)
                                    continue
                                elif isCommentary == 2:
                                    countLineInput += 1
                                    isCommentary = 0
                                    continue
                                elif isCommentary == 1 and noCommentary == 1:
                                    isCommentary = 0
                                    noCommentary = 0
                                    print(eachLine)
                                    continue
                                else:
                                    pass

                            if searchCommentariesBeginLine is not None:
                                countLineInput += 1
                                eachLine = eachLine.replace(
                                    searchCommentariesBeginLine.group(0), "")
                                print(eachLine)
                            elif searchCommentariesAfterLine is not None:
                                eachLine = eachLine.replace(
                                    searchCommentariesAfterLine.group(0), "")
                                countLineInput += 1
                                print(eachLine)
                            else:
                                print(eachLine)
                bar.next(1)
            bar.finish()

        # -- Initialize vars -- #
        isCommentary = 0
        noCommentary = 0

        # -- Check if all commentaries are removed -- #
        for file in recursFiles:
            countLineOutput = 0
            if blockDir in file:
                continue
            else:
                with open(file, "r") as readFile:
                    countLineOutput = 0
                    readF = readFile.readlines()
                    for eachLine in readF:
                        searchCommentariesAfterLine = re.search(
                            commentariesAfterLine, eachLine)
                        searchCommentariesBeginLine = re.search(
                            commentariesBeginLine, eachLine)
                        if codeArg == "python":
                            if "coding" in eachLine or "#!" in eachLine:
                                continue

                            if re.match(quoteInRegex, eachLine):
                                continue
                            elif re.match(quoteIntoVariable, eachLine):
                                noCommentary += 1
                            elif re.match(
                                    quoteOfCommentariesMultipleLines,
                                    eachLine) or re.match(
                                        quoteOfEndCommentariesMultipleLines,
                                        eachLine):
                                isCommentary += 1
                            else:
                                pass

                            if re.match(quoteOfCommentariesOneLine, eachLine):
                                isCommentary = 0
                                countLineOutput += 1
                                continue
                            elif isCommentary == 1 and noCommentary == 0:
                                countLineOutput += 1
                                continue
                            elif isCommentary == 0 and noCommentary == 1:
                                continue
                            elif isCommentary == 2:
                                isCommentary = 0
                                countLineOutput += 1
                                continue
                            elif isCommentary == 1 and noCommentary == 1:
                                isCommentary = 0
                                noCommentary = 0
                                continue
                            else:
                                pass

                        if searchCommentariesBeginLine is not None:
                            countLineOutput += 1
                        elif searchCommentariesAfterLine is not None:
                            countLineOutput += 1
                        else:
                            pass

        if (Remove.Backslashes(self, codeArg, outputArg) == 0):
            if countLineOutput == 0:
                print("\n-> {0} lines of commentaries removed\n".format(
                    countLineInput))
                return EXIT_SUCCESS
            else:
                return EXIT_FAILURE
        else:
            return EXIT_FAILURE
Ejemplo n.º 35
0
    def PrintFunctions(self, codeArg, outputArg):
        countPrintLine = 0
        countCheckPrintLine = 0
        countRecursFiles = 0
        checkPrintPy3MultipleLines = 0
        checkPrintPy2MultipleLines = 0

        if codeArg == "python":
            detectFiles = "py"
            blockDir = "__pycache__"

            detectPrint = r"\s*print"
            detectPythonPrint2 = r"\s*print\s*[\"|\']{1}"
            detectPythonPrint3 = r"\s*print\s*\({1}"
            detectPythonPrintMultipleLines = r"^\s+[\"\']{1}\s*\w+|^[\"\']{1}\s*\w+"

        recursFiles = [
            f for f in glob.glob("{0}{1}**{1}*.{2}".format(
                outputArg, self.utils.Platform(), detectFiles),
                                 recursive=True)
        ]

        for number in recursFiles:
            countRecursFiles += 1

        print("\n[+] Running remove print function in {0} file(s)...\n".format(
            countRecursFiles))

        with Bar(PROGRESS_COLOUR + 'Processing', max=countRecursFiles) as bar:
            for file in recursFiles:
                if blockDir in file:
                    continue
                else:
                    # -- Remove all print functions -- #
                    with fileinput.input(file, inplace=True) as inputFile:
                        for eachLine in inputFile:
                            if re.match(detectPrint, eachLine):
                                countPrintLine += 1
                                # -- If print() python 3 is multiple lines -- #
                                if re.match(detectPythonPrint3, eachLine):
                                    if "(" in eachLine and not ")" in eachLine:
                                        checkPrintPy3MultipleLines += 1
                                        continue
                                    else:
                                        continue
                                # -- If print python 2 is multiple lines -- #
                                elif re.match(detectPythonPrint2, eachLine):
                                    checkPrintPy2MultipleLines += 1
                                    continue
                            else:
                                if checkPrintPy3MultipleLines == 1:
                                    if ")" in eachLine and not "(" in eachLine:
                                        checkPrintPy3MultipleLines = 0
                                        continue
                                    else:
                                        continue
                                elif checkPrintPy2MultipleLines > 0:
                                    if re.match(detectPythonPrintMultipleLines,
                                                eachLine):
                                        checkPrintPy2MultipleLines += 1
                                        continue
                                    else:
                                        checkPrintPy2MultipleLines = 0
                                        print(eachLine)
                                        continue
                                else:
                                    print(eachLine)
                bar.next(1)
            bar.finish()

        # -- Check if all print functions are removed -- #
        for file in recursFiles:
            if blockDir in file:
                continue
            else:
                with open(file, "r") as readFile:
                    readF = readFile.readlines()
                    for eachLine in readF:
                        if re.match(detectPrint, eachLine):
                            countCheckPrintLine += 1

        if (Remove.Backslashes(self, codeArg, outputArg) == 0):
            if countCheckPrintLine == 0:
                print("\n-> {0} print functions removed\n".format(
                    countPrintLine))
                return EXIT_SUCCESS
            else:
                return EXIT_FAILURE
        else:
            return EXIT_FAILURE
Ejemplo n.º 36
0
def save_hdf5(
    dataset: Dataset,
    path: str,
    num_shards: Optional[int] = None,
    shard_size: Optional[int] = None,
    verbose: bool = True,
    bar: Bar = _DefaultBar,
) -> None:
    r"""Saves the contents of the dataset to one or more HDF5 files.

    .. warning::
        HDF5 support in Combustion is deprecated

    Serialization is performed as follows:
        1.  Dataset partitions are determined if required by ``num_shards`` or ``shard_size``. By default,
            only a single file containing the entire dataset will be produced.
        2.  Examples are read by iterating over the dataset and are written to disk. For multiple
            shards, a shard index is added to the filename given in ``path``.
        3.  Attributes accessible by ``vars(self)`` are attached as HDF5 attributes, allowing for loading
            of instance variables. Tensors are not saved in this way, as all attributes should be small.

    .. note::
        Serialization requires the h5py library. See http://docs.h5py.org/en/stable/index.html
        for more details.

    .. note::
        When saving multiple shards, the file created at ``path`` will be created from a
        :class:`h5py.VirtualSource`. See `Virtual Dataset <http://docs.h5py.org/en/stable/vds.html>`_
        for more details.

    Args:
        dataset (Datset): The dataset to save.
        path (str): The filepath to save to. Ex ``foo/bar.h5``.
        num_shards (int, optional): If given, `num_shards` files will be created, each
            containing ``1 / num_shards`` of the dataset. Exclusive with ``shard_size``.
            Must be a positive int.
        shard_size (int, optional): If given, multiple files will be created such that
            each file contains ``shard_size`` examples. Exclusive with ``num_shards``.
            Must be a positive int.
        verbose (bool, optional): If False, do not print progress updates during saving.
        bar (:class:`progress.bar.Bar`, optional): Progress bar class
    """
    _check_h5py()
    if num_shards is not None and shard_size is not None:
        raise ValueError("num_shards is incompatible with shard_size, please use one or the other")
    if num_shards is not None and num_shards <= 0:
        raise ValueError(f"num_shards must be >= 1, got {num_shards}")
    if shard_size is not None and shard_size <= 0:
        raise ValueError(f"shard_size must be >= 1, got {shard_size}")
    if shard_size is None and not hasattr(dataset, "__len__"):
        raise ValueError("shard_size is required for datasets with no len() method")

    # calculate num shards / shard size
    if num_shards is None and shard_size is None:
        num_shards = 1
        shard_size = len(dataset)
    elif num_shards is not None:
        num_shards = int(num_shards)
        shard_size = len(dataset) // num_shards
    elif shard_size is not None:
        shard_size = int(shard_size)
        num_shards = len(dataset) // shard_size

    # write shards
    files = set()
    if num_shards == 1:
        f = _write_shard(path, iter(dataset), shard_size, verbose=verbose)
        files.add(f)
    else:
        if not verbose:
            bar = bar(f"Writing to {path}", max=len(dataset))
        else:
            bar = None

        # slice dataset iterator for multi-sharding
        slices = [(x * shard_size, (x + 1) * shard_size) for x in range(num_shards)]
        for shard_index, (low, high) in enumerate(slices, start=1):
            data = itertools.islice(iter(dataset), low, high)
            f = _write_shard(path, data, shard_size, shard_index, verbose=False)
            files.add(f)
            if bar is not None:
                bar.next()
        if bar is not None:
            bar.finish()

    _finalize_master(dataset, path, files)
    return path
Ejemplo n.º 37
0
    def fit(self, train_domain, num_epochs, patience, optimizer, train_dir,
            dev_dir):
        """
        Trains the model.
        :param train_domain: the domain used for training
        :param num_epochs: the max number of epochs the model should be trained
        :param patience: the patience to use for early stopping
        :param optimizer: the optimizer that should be used
        :param train_dir: the directory containing the training files
        :param dev_dir: the directory containing the development files
        """
        print("Reading training data from %s..." % train_dir, flush=True)
        train_X, train_Y, _, _, word2id, char2id, task2t2i = get_data(
            [train_domain], self.task_names, data_dir=train_dir, train=True)

        # get the development data of the same domain
        dev_X, dev_Y, org_X, org_Y, _, _, _ = get_data([train_domain],
                                                       self.task_names,
                                                       word2id,
                                                       char2id,
                                                       task2t2i,
                                                       data_dir=dev_dir,
                                                       train=False)
        print('Length of training data:', len(train_X), flush=True)
        print('Length of validation data:', len(dev_X), flush=True)

        # store mappings of words and tags to indices
        self.set_indices(word2id, char2id, task2t2i)
        num_words = len(self.word2id)
        num_chars = len(self.char2id)

        print('Building the computation graph...', flush=True)
        self.predictors, self.char_rnn, self.wembeds, self.cembeds = \
            self.build_computation_graph(num_words, num_chars)

        if optimizer == SGD:
            trainer = dynet.SimpleSGDTrainer(self.model)
        elif optimizer == ADAM:
            trainer = dynet.AdamTrainer(self.model)
        else:
            raise ValueError('%s is not a valid optimizer.' % optimizer)

        train_data = list(zip(train_X, train_Y))

        num_iterations = 0
        num_epochs_no_improvement = 0
        best_dev_acc = 0

        print('Training model with %s for %d epochs and patience of %d.' %
              (optimizer, num_epochs, patience))
        for epoch in range(num_epochs):
            print('', flush=True)
            bar = Bar('Training epoch %d/%d...' % (epoch + 1, num_epochs),
                      max=len(train_data),
                      flush=True)

            # keep track of the # of updates, total loss, and total # of
            # predicted instances per task
            task2num_updates = {task: 0 for task in self.task_names}
            task2total_loss = {task: 0.0 for task in self.task_names}
            task2total_predicted = {task: 0.0 for task in self.task_names}
            total_loss = 0.0
            total_penalty = 0.0
            total_predicted = 0.0
            # setting seed for shuffling the data
            random.seed(123)
            random.shuffle(train_data)

            # for every instance, we optimize the loss of the corresponding task
            for (word_indices, char_indices), task2label_id_seq in train_data:
                # get the concatenated word and char-based features for every
                # word in the sequence
                features = self.get_word_char_features(word_indices,
                                                       char_indices)
                for task, y in task2label_id_seq.items():
                    if task in [
                            POS, NER, SENTI2, SENTI3, REX, NFIX, FFD, TRT, MFD,
                            EEG_T, EEG_A, EEG_B, EEG_G, FIXP, FREQ, ALL
                    ]:
                        output, penalty = self.predict(features,
                                                       task,
                                                       train=True)
                    else:
                        raise NotImplementedError('Task %s has not been '
                                                  'implemented yet.' % task)
                    loss = dynet.esum([
                        pick_neg_log(pred, gold)
                        for pred, gold in zip(output, y)
                    ])
                    lv = loss.value()
                    # sum the loss and the subspace constraint penalty
                    #combined_loss = loss + dynet.parameter(self.constraint_weight_param, update=False) * penalty
                    combined_loss = loss + self.constraint_weight_param * penalty
                    total_loss += lv
                    total_penalty += penalty.value()
                    total_predicted += len(output)
                    task2total_loss[task] += lv
                    task2total_predicted[task] += len(output)
                    task2num_updates[task] += 1

                    # back-propagate through the combined loss
                    combined_loss.backward()
                    trainer.update()
                bar.next()
                num_iterations += 1

            print(
                "\nEpoch %d. Total loss: %.3f. Total penalty: %.3f. Losses: " %
                (epoch, total_loss / total_predicted,
                 total_penalty / total_predicted),
                end='',
                flush=True)
            for task in task2total_loss.keys():
                print(
                    '%s: %.3f. ' %
                    (task, task2total_loss[task] / task2total_predicted[task]),
                    end='',
                    flush=True)
            print('', flush=True)

            # evaluate after every epoch
            dev_acc = self.evaluate(dev_X, dev_Y)

            if dev_acc > best_dev_acc:
                print('Main task %s dev acc %.4f is greater than best dev acc '
                      '%.4f...' % (self.main_task, dev_acc, best_dev_acc),
                      flush=True)
                best_dev_acc = dev_acc
                num_epochs_no_improvement = 0
                print('Saving model to directory %s...' % self.model_dir,
                      flush=True)
                self.save()
            else:
                print('Main task %s dev acc %.4f is lower than best dev acc '
                      '%.4f...' % (self.main_task, dev_acc, best_dev_acc),
                      flush=True)
                num_epochs_no_improvement += 1
            if num_epochs_no_improvement == patience:
                print('Early stopping...', flush=True)
                print('Loading the best performing model from %s...' %
                      self.model_dir,
                      flush=True)

                self.model.populate(self.model_file)
                #self.model.load(self.model_file)
                break
Ejemplo n.º 38
0
def recursivecell(
        levels=3,
        init=None,
        tEnd=100,
        dt=0.001,
        couplcoeff=0.3,
        drive=0.5,
        plots=True,
        plots2=True,
        parallel=True,  #for if we want the video paralellized or not 
        vidja=True,
        scalefactor=0.8,
        step=10,
        width=1080,
        height=1080,
        dpi=100):  #video parameters
    #finding amount of cells
    cellnr = cellamount(levels)

    #random startvector
    if np.any(init == None):
        startvect = np.empty((cellnr, 1))
        for i in range(cellnr):
            startvect[i, 0] = random.random() * 2 * np.pi
    else:
        startvect = np.array(init)
        startvect = np.reshape(startvect, (cellnr, 1))
    initcon = np.array2string(np.reshape(startvect, (1, cellnr)))
    #coupling matrices
    totcoupl = n_matrixgen(levels,
                           0.25)  #using function to generate coupling matrix
    drivevect = np.ones(cellnr) * drive

    #simulation setup
    phasevect = startvect
    lensarray = np.empty(1)
    lensarray[0] = np.linalg.norm(phasevect)  # array of lengths
    t = 0  #+dt
    print(totcoupl)

    #phasearray = np.array([phasevect],) #total array of all the phases
    phasearray = np.empty((cellnr, int(tEnd / dt + 1)))
    phasearray[:, 0] = phasevect[:, 0]

    bar1 = Bar('simulating', max=int(tEnd / dt))

    stepcounter = 1  #stepcounter since int(t/dt) did strange things
    #actually simulating
    while stepcounter < tEnd / dt + 1:
        dphase = couplcoeff * (
            np.mod(-np.matmul(totcoupl, phasevect) + np.pi, np.pi * 2) -
            np.pi) + drivevect  #calculating derivative
        #phasevect = RK4(phasevect,dt,totcoupl,couplcoeff,drivevect)
        phasevect = phasevect + dt * dphase
        lensarray = np.append(lensarray, np.linalg.norm(phasevect))
        phasearray[:, stepcounter] = phasevect[:, 0]
        stepcounter += 1
        t = t + dt
        bar1.next()
    bar1.finish()

    #finding time for filenames
    now = datetime.now()

    #transposing the array bc I didn't want to rewrite half the plots
    phasearray = np.transpose(phasearray)

    #dumping the whole thing to a csv for later use
    np.savetxt(
        'outputs/recursive/{now}drive{drive}coupl{coup}.csv'.format(
            now=now, drive=drive, coup=couplcoeff), phasearray)

    if plots:
        #plot phase stuff
        vp.set_palette('cool')
        tarr = np.arange(0, tEnd + 1 * dt, dt)
        plt.figure(figsize=(15, 10))
        for i in range(cellnr):
            plt.plot(tarr, np.mod(phasearray[:, i], 2 * np.pi))
        plt.savefig('outputs/recursive/phase_{}.png'.format(now))
        plt.close()
        #plot sine
        plt.figure(figsize=(15, 5))
        for i in range(cellnr):
            plt.plot(tarr, np.sin(phasearray[:, i]))

        plt.savefig('outputs/recursive/sine_{}.png'.format(now))
        plt.close()
    #plots that are more useful for bigger sytems
    if plots2:
        cool = vp.palette('cool')  #let's make em pretty
        plotbar = Bar('plotting...', max=cellnr)
        tarr = np.arange(0, tEnd + 1 * dt, dt)
        fig = plt.figure(figsize=(15, 5 * math.ceil(cellnr / 3)))
        for i in range(cellnr):
            data = phasearray[:, i]
            sins = np.sin(data)
            diffs = centerdif(sins, dt)

            #phasespace
            ax = fig.add_subplot(int(math.ceil(cellnr / 3)), 3, i + 1)
            ax.plot(sins, diffs, c=cool[np.mod(i, len(cool))])
            ax.set_xlabel('x{}'.format(i + 1))
            ax.set_ylabel("x'{}".format(i + 1))
            ax.set_ylim(top=1, bottom=-1)
            plotbar.next()
        plt.savefig('outputs/recursive/{}phaseplane.png'.format(now))
        plotbar.finish()
        plt.close()
        #sineplots
        fig2 = plt.figure(figsize=(15, 5 * math.ceil(cellnr / 3)))
        plotbar = Bar('plotting...', max=cellnr)
        for i in range(cellnr):
            data = phasearray[:, i]
            sins = np.sin(data)
            ax2 = fig2.add_subplot(cellnr, 1, i + 1)
            ax2.plot(tarr, sins, c=cool[np.mod(i, len(cool))])
            ax2.set_xlabel('t')
            ax2.set_ylabel("x{}".format(i + 1))
            plotbar.next()
        plt.savefig('outputs/recursive/{}sins.png'.format(now))
        plt.close()
        plotbar.finish()

    #Video stuff
    if vidja == True:
        if parallel == True:
            parallelvideo(cellnr, levels, tEnd, dt, phasearray, totcoupl, now,
                          scalefactor, step, width, height, dpi)
        else:
            videomaker(cellnr, levels, tEnd, dt, phasearray, totcoupl, now,
                       scalefactor, step, width, height, dpi)
Ejemplo n.º 39
0
def test(testloader, model, criterion, epoch, use_cuda):

    data_time = AverageMeter()

    bar = Bar('Processing', max=len(testloader))

    with torch.no_grad():
        for batch_idx, data in enumerate(testloader):

            frames, masks, objs, infos = data

            if use_cuda:
                frames = frames.cuda()
                masks = masks.cuda()

            frames = frames[0]
            masks = masks[0]
            num_objects = objs[0]
            info = infos[0]
            max_obj = masks.shape[1] - 1
            # compute output
            t1 = time.time()

            T, _, H, W = frames.shape
            pred = [masks[0:1]]
            keys = []
            vals = []
            for t in range(1, T):
                if t - 1 == 0:
                    tmp_mask = masks[0:1]
                elif 'frame' in info and t - 1 in info['frame']:
                    # start frame
                    mask_id = info['frame'].index(t - 1)
                    tmp_mask = masks[mask_id:mask_id + 1]
                    num_objects = max(num_objects, tmp_mask.max())
                else:
                    tmp_mask = out

                # memorize
                key, val, _ = model(frame=frames[t - 1:t, :, :, :],
                                    mask=tmp_mask,
                                    num_objects=num_objects)

                # segment
                tmp_key = torch.cat(keys + [key], dim=1)
                tmp_val = torch.cat(vals + [val], dim=1)
                logits, ps = model(frame=frames[t:t + 1, :, :, :],
                                   keys=tmp_key,
                                   values=tmp_val,
                                   num_objects=num_objects,
                                   max_obj=max_obj)

                out = torch.softmax(logits, dim=1)
                pred.append(out)

                if (t - 1) % opt.save_freq == 0:
                    keys.append(key)
                    vals.append(val)

            pred = torch.cat(pred, dim=0)
            pred = pred.detach().cpu().numpy()
            write_mask(pred, info, opt, directory=opt.output_dir)

            toc = time.time() - t1

            data_time.update(toc, 1)

            # plot progress
            bar.suffix = '({batch}/{size}) Time: {data:.3f}s'.format(
                batch=batch_idx + 1, size=len(testloader), data=data_time.sum)
            bar.next()
        bar.finish()

    return
Ejemplo n.º 40
0
import time

from progress.bar import Bar  #pip install progress

bar = Bar('Processing',
          max=20,
          suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')
for i in range(20):
    time.sleep(.05)
    bar.next()
bar.finish()
def trunco_check(path_tranco_list, path_names):
    """
    Args:
        path_tranco_list: top1m from tranco
        path_names = list of dom names
    """
    #path tranco list
    path_tranco = path_tranco_list
    #path lista nomi dom campus
    path_lnd_campus = path_names

    #extract domain names from tranco list
    tranco_dom = []
    with open(path_tranco) as tranco_csv:
        lines = tranco_csv.readlines()
        for line in lines:
            tranco_dom.append(line)

    campus_dom = []
    #extract domain from campus list, removing 'www.', 'www8.','www2.' at beginning of domain names
    with open(path_lnd_campus) as campus_txt:
        lines = campus_txt.readlines()
        for line in lines:
            if '\n' in line:
                #list_domain_name.append(line.strip('\n'))
                line = line.strip('\n')
                if 'www8.' in line:
                    line = line.replace('www8.', '')
                    campus_dom.append(line)
                elif 'www2.' in line:
                    line = line.replace('www2.', '')
                    campus_dom.append(line)
                    #aggiungere gestione che toglie il 'www.', 'www2.', 'www8.'.. anche sotto
                elif 'www.' in line:
                    line = line.replace('www.', '')
                    campus_dom.append(line)
                else:
                    print('[debug]probably no new case')
                    campus_dom.append(line)
            else:
                if 'www8.' in line:
                    line = line.replace('www8.', '')
                    campus_dom.append(line)
                elif 'www2.' in line:
                    line = line.replace('www2.', '')
                    campus_dom.append(line)
                    #aggiungere gestione che toglie il 'www.', 'www2.', 'www8.'.. anche sotto
                elif 'www.' in line:
                    line = line.replace('www.', '')
                    campus_dom.append(line)
                else:
                    print('[debug]2 probably no new case ')
                    campus_dom.append(line)

    if os.path.exists('tranco_inter_campus.csv'):
        print('Removingn old csv')
        os.remove('tranco_inter_campus.csv')
    else:
        print('No old csv exists')

    header = ['domain', 'tranco']
    print('CSV Creation')

    bar_csv = Bar('Csv creation', max=len(campus_dom), fill='~')
    with open('tranco_inter_campus.csv', mode='a') as csv_out:
        writer = csv.writer(csv_out)
        writer.writerow(header)
        for x in campus_dom:
            data = []
            if x in tranco_dom:
                data.append(str(x))
                data.append('1')
                writer.writerow(data)
            else:
                data.append(str(x))
                data.append('0')
                writer.writerow(data)
            bar_csv.next()
    bar_csv.finish()
Ejemplo n.º 42
0
def main():
    capture = cv2.VideoCapture('input.mp4')
    background_subtractor = cv2.createBackgroundSubtractorMOG2()
    length = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))

    bar = Bar('Processing Frame', max=length)

    first_iteration_indicator = 1
    for i in range(0, length):
        ret, frame = capture.read()

        #If first frame
        if first_iteration_indicator == 1:

            first_frame = copy.deepcopy(frame)
            height, width = frame.shape[:2]
            accum_image = np.zeros((height, width), np.uint8)
            first_iteration_indicator = 0

        else:
            filter = background_subtractor.apply(frame)
            cv2.imwrite('./frame.jpg', frame)
            cv2.imwrite('./diff-bkgnd-frame.jpg', filter)

            threshold = 2
            maxValue = 2
            ret, th1 = cv2.threshold(filter, threshold, maxValue,
                                     cv2.THRESH_BINARY)

            #add to the accumulated image
            accum_image = cv2.add(accum_image, th1)
            cv2.imwrite('./mask.jpg', accum_image)

            color_image_video = cv2.applyColorMap(accum_image,
                                                  cv2.COLORMAP_SUMMER)

            video_frame = cv2.addWeighted(frame, 0.7, color_image_video, 0.7,
                                          0)

            name = "./frames/frame%d.jpg" % i
            print(name)
            cv2.imwrite(name, video_frame)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        bar.next()

    bar.finish()

    make_video('./frames/', './output.avi')

    color_image = cv2.applyColorMap(accum_image, cv2.COLORMAP_HOT)
    result_overlay = cv2.addWeighted(first_frame, 0.7, color_image, 0.7, 0)

    #save the final heatmap
    cv2.imwrite('diff-overlay.jpg', result_overlay)

    #cleanup
    capture.release()
    cv2.destroyAllWindows()
Ejemplo n.º 43
0
    arg.add_argument("-d",
                     "--destination",
                     help="Specify the directory to output final image to.",
                     required=True)
    arg.add_argument("-n",
                     "--num_tiles",
                     help="Max number of tiles to solve.",
                     required=False)
    args = arg.parse_args()
    cm = cachemap(source=args.source, dest=args.destination)

    if os.path.isdir(args.source):  # if source is valid, proceed
        image_list = []
        max = len(os.listdir(
            args.source))  # number of images in dir (starting at 1)
        bar = Bar("[+] Reading in images from %s" % (args.source), max=max)
        for image in os.listdir(args.source):
            if image.endswith(".bmp"):
                # add image to array
                image_list.append(Image.open(os.path.join(args.source, image)))
                bar.next()
    else:
        sys.stderr.write(
            "Invalid -s/--source path %s. Use -h/--help for help" %
            (os.linesep))
        exit(-1)

#     for img in images:
#         if cm.read_bmp(img):
#             cm.genetic_algo()
#             cm.export_sol()
Ejemplo n.º 44
0
def run(filename, split, level, dred, dbrown, debug):

    # Create OpenSlide object
    ndpi = OpenSlide(filename)
    ndpi_width = ndpi.dimensions[0]
    ndpi_height = ndpi.dimensions[1]
    total_width = ndpi.level_dimensions[level][0]
    total_height = ndpi.level_dimensions[level][1]

    red_sum = 0.0
    brown_sum = 0.0
    surface_sum = 0.0

    startTime = time.time()

    if debug:
        print "filename: {}".format(filename)
        print "split: {}".format(split)
        print "level: {}".format(level)
        print "dred: {}".format(dred)
        print "dbrown: {}".format(dbrown)
        print "debug: {}".format(debug)

    if debug:
        print "================ START ================="
        print "LOAD {}".format(filename)
        print "width:".ljust(20) + str(ndpi_width)
        print "height:".ljust(20) + str(ndpi_height)
        print "level count:".ljust(20) + str(ndpi.level_count)
        print "split image {}x{} , level:{} , factor:{}".format(
            total_width, total_height, level, split)

    bar = Bar('Processing', max=split**2)
    for i in range(split):
        for j in range(split):
            x = i * ndpi_width / split
            y = j * ndpi_height / split
            w = total_width / split
            h = total_height / split

            if debug:
                print "\n>SLICE [{}][{}]".format(i, j)
                print "x:{:3} y:{:3} w:{:3}px h:{:3}px:".format(x, y, w, h)

            region = ndpi.read_region((x, y), level, (w, h))
            red = bgsa.get_red(region, brightness=-dred)
            brown = bgsa.get_brown(region, brightness=-dbrown)
            # surface = bgsa.get_surface(region)

            region.save("output/normal_slice{}{}.png".format(i, j))
            red.save("output/red_slice_{}{}.png".format(i, j))
            brown.save("output/brown_slice_{}{}.png".format(i, j))
            # surface.save("output/surface_slice_{}{}.png".format(i,j))

            red_sum += bgsa.get_white_pixels(red)
            brown_sum += bgsa.get_white_pixels(brown)
            # surface_sum+= bgsa.get_white_pixels(surface)

            if debug:
                bar.next()

            # print "white:{}% black{}%".format(results["white"], results["black"])

    if debug:
        bar.finish()
        print "Finished....in {:.2f} sec".format(time.time() - startTime)
        print "total red   :".ljust(20) + str(red_sum)
        print "total brown :".ljust(20) + str(brown_sum)

    return {"red": red_sum, "brown": brown_sum}
Ejemplo n.º 45
0
import mne.io
import pandas as pd
from progress.bar import Bar

if __name__ == '__main__':
    path_to_egg_data = "../real_EEG_data/"
    time_segment = 180  # seconds - 3 min
    time_list, list_start, list_stop, shape = [], [], [], []

    bar = Bar("Processing the files", max=31)

    # read all the .edf files
    for file in range(1, 32):
        raw_fname = path_to_egg_data + str(file) + '.edf'
        raw = mne.io.read_raw_edf(raw_fname, preload=True)
        data, times = raw[-1, -1:]

        total_time = float(times)
        time_list.append(total_time)

        # Splitting the data into chunks of 3 minutes
        t_start = 0
        t_end = t_start + time_segment
        segment = 0
        while t_end < total_time:
            start, stop = raw.time_as_index([t_start, t_end])
            list_start.append(start)
            list_stop.append(stop)

            data = raw.get_data(start=start, stop=stop)
            df = pd.DataFrame(data,
Ejemplo n.º 46
0
    def infer(self):
        hdf5_group_prefix = f"/task_{self.task.uid}/inference"
        data_hdf5_group = self.results_hdf5_file.create_group(
            f"/{hdf5_group_prefix}/data")

        self.model.eval()
        if self.task.type in [
                TaskTypeEnum.SUPERVISED_LEARNING, TaskTypeEnum.INFERENCE
        ]:
            dataloader = self.task.labeled_dataloader.dataloaders['test']
        else:
            raise NotImplementedError(
                f"The following task type is not implemented: {self.task.type}"
            )

        dataloader_meta_info = DataloaderMetaInfo(dataloader)

        subgrid_size = self.task.labeled_dataloader.config.get("subgrid_size")

        with torch.no_grad(), profiler.profile() as prof:
            start_idx = 0
            progress_bar = Bar(f"Inference for task {self.task.uid}",
                               max=len(dataloader))
            for batch_idx, data in enumerate(dataloader):
                data = self.dict_to_device(data)
                batch_size = data[ChannelEnum.OCC_DEM].size(0)
                grid_size = list(data[ChannelEnum.OCC_DEM].size()[1:3])

                grid_data = data
                if subgrid_size is not None:
                    data = self.split_subgrids(subgrid_size, data)

                with profiler.record_function("model_inference"):
                    output = self.model(data)

                if subgrid_size is not None:
                    # max occlusion ratio threshold for COMP_DEM where we accept reconstruction
                    # instead of just taking all OCC_DEM
                    subgrid_max_occ_ratio_thresh = self.task.config.get(
                        "subgrid_max_occ_ratio_thresh", 1.0)
                    if subgrid_max_occ_ratio_thresh < 1.0:
                        occ_dem = data[ChannelEnum.OCC_DEM]
                        occ_ratio = torch.isnan(occ_dem).sum(
                            dim=(1, 2)) / (occ_dem.size(1) * occ_dem.size(2))
                        occ_ratio_selector = occ_ratio > subgrid_max_occ_ratio_thresh

                        comp_dem = output[ChannelEnum.COMP_DEM]
                        comp_dem[occ_ratio_selector, :, :] = occ_dem[
                            occ_ratio_selector, :, :]
                        output[ChannelEnum.COMP_DEM] = comp_dem

                        if ChannelEnum.OCC_DATA_UM in data and ChannelEnum.COMP_DATA_UM in output:
                            occ_data_um = output[ChannelEnum.OCC_DATA_UM]
                            comp_data_um = output[ChannelEnum.COMP_DATA_UM]
                            comp_data_um[
                                occ_ratio_selector, :, :] = occ_data_um[
                                    occ_ratio_selector, :, :]
                            output[ChannelEnum.COMP_DATA_UM] = comp_dem

                    output = self.unsplit_subgrids(grid_size, output)
                    data = grid_data

                self.add_batch_data_to_hdf5_results(
                    data_hdf5_group, data, start_idx,
                    dataloader_meta_info.length)
                self.add_batch_data_to_hdf5_results(
                    data_hdf5_group, output, start_idx,
                    dataloader_meta_info.length)

                start_idx += batch_size
                progress_bar.next()
            progress_bar.finish()

        with open(str(self.task.logdir / "inference_cputime.txt"), "a") as f:
            f.write(prof.key_averages().table(sort_by="cpu_time_total",
                                              row_limit=20))
        prof.export_chrome_trace(
            str(self.task.logdir / "inference_cputime_chrome_trace.json"))
Ejemplo n.º 47
0
def generate_onnx_representation(pretrained_version=None, model=None):
    """Exports a given huggingface pretrained model, or a given model and tokenizer, to onnx

    Args:
        pretrained_version (str): Name of a pretrained model, or path to a pretrained / finetuned version of T5
    """
    if (pretrained_version is None) and model is None:
        print(
            "You need to specify pretrained_version (the pretrained model you wish to export). Alternatively you can export a model you have in memory."
        )

        return
    if model is not None:
        (
            simplified_encoder,
            decoder_with_lm_head,
            decoder_with_lm_head_init,
        ) = turn_model_into_encoder_decoder(model)
    else:
        (
            simplified_encoder,
            decoder_with_lm_head,
            decoder_with_lm_head_init,
        ) = create_t5_encoder_decoder(pretrained_version)

    # model paths for enc, dec and dec_init
    encoder_path, decoder_path, init_decoder_path = get_model_paths(
        pretrained_version, saved_models_path, quantized=False
    )

    tokenizer = AutoTokenizer.from_pretrained(pretrained_version)

    sample_input = "translate English to French: The universe is a dark forest."
    model_inputs = tokenizer(sample_input, return_tensors="pt")
    model_config = AutoConfig.from_pretrained(pretrained_version)

    input_ids = model_inputs["input_ids"]
    attention_mask = model_inputs["attention_mask"]

    # dummy inputs
    batch_size = 5
    n_heads = model_config.num_heads
    seq_length_a, seq_length_b = input_ids.shape
    d_kv = model_config.d_kv

    input_ids_dec = torch.ones((5, 1), dtype=torch.int64)
    attention_mask_dec = torch.ones((5, seq_length_b), dtype=torch.int64)
    enc_out = torch.ones(
        (batch_size, seq_length_b, model_config.d_model), dtype=torch.float32
    )

    # self_attention_past_key_values = torch.ones(
    #     (model_config.num_decoder_layers, 2, batch_size, n_heads, seq_length_a, d_kv), dtype=torch.float32)
    # cross_attention_past_key_values = torch.ones(
    #     (model_config.num_decoder_layers, 2, batch_size, n_heads, seq_length_b, d_kv), dtype=torch.float32)

    a = torch.ones(
        (batch_size, n_heads, seq_length_a, d_kv), dtype=torch.float32
    )  # 1, 8, 1, 64
    b = torch.ones(
        (batch_size, n_heads, seq_length_b, d_kv), dtype=torch.float32
    )  # 1, 8, 30, 64
    t5_block = (a, a, b, b)
    past_key_values = (t5_block,) * model_config.num_decoder_layers

    flat_past_key_values = functools.reduce(operator.iconcat, past_key_values, [])

    decoder_all_inputs = tuple(
        [input_ids_dec, attention_mask_dec, enc_out] + flat_past_key_values
    )

    num_of_inputs = 4 * model_config.num_decoder_layers

    # for progress bars
    bar = Bar("Exporting to onnx...", max=3)

    import warnings

    # ignores all the warnings during conversion
    warnings.filterwarnings("ignore")

    # Exports to ONNX
    with torch.no_grad():

        decoder_inputs = [
            "input_ids",
            "encoder_attention_mask",
            "encoder_hidden_states",
        ]

        pkv_input_names = ["input_{}".format(i) for i in range(0, num_of_inputs)]

        decoder_input_names = decoder_inputs + pkv_input_names

        decoder_output_names = ["logits", "output_past_key_values"]

        dyn_axis = {
            "input_ids": {0: "batch", 1: "sequence"},
            "encoder_attention_mask": {0: "batch", 1: "sequence"},
            "encoder_hidden_states": {0: "batch", 1: "sequence"},
            "logits": {0: "batch", 1: "sequence"},
            "output_past_key_values": {0: "batch", 1: "sequence"},
        }

        dyn_pkv = {
            "input_{}".format(i): {0: "batch", 1: "n_head", 2: "seq_length", 3: "d_kv"}
            for i in range(0, num_of_inputs)
        }

        dyn_axis_params = {**dyn_axis, **dyn_pkv}

        # decoder to utilize past key values:
        torch.onnx.export(
            decoder_with_lm_head,
            decoder_all_inputs,
            decoder_path.as_posix(),
            export_params=True,
            do_constant_folding=True,
            opset_version=12,
            input_names=decoder_input_names,
            output_names=decoder_output_names,
            dynamic_axes=dyn_axis_params,
        )
        bar.next()

        torch.onnx.export(
            simplified_encoder,
            args=(input_ids, attention_mask),
            f=encoder_path.as_posix(),
            export_params=True,
            opset_version=12,
            do_constant_folding=True,
            input_names=["input_ids", "attention_mask"],
            output_names=["hidden_states"],
            dynamic_axes={
                "input_ids": {0: "batch", 1: "sequence"},
                "attention_mask": {0: "batch", 1: "sequence"},
                "encoder_hidden_states": {0: "batch", 1: "sequence"},
                "hidden_states": {0: "batch", 1: "sequence"},
            },
        )
        bar.next()
        # initial decoder to produce past key values
        torch.onnx.export(
            decoder_with_lm_head_init,
            (input_ids_dec, attention_mask_dec, enc_out),
            init_decoder_path.as_posix(),
            export_params=True,
            opset_version=12,
            input_names=[
                "input_ids",
                "encoder_attention_mask",
                "encoder_hidden_states",
            ],
            output_names=["logits", "past_key_values"],
            dynamic_axes={
                # batch_size, seq_length = input_shape
                "input_ids": {0: "batch", 1: "sequence"},
                "encoder_hidden_states": {0: "batch", 1: "sequence"},
                "logits": {0: "batch", 1: "sequence"},
                "past_key_values": {0: "batch", 1: "sequence"},
                "encoder_attention_mask": {0: "batch", 1: "sequence"},
            },
        )
        bar.next()
        bar.finish()

    return encoder_path, decoder_path, init_decoder_path
Ejemplo n.º 48
0
def verify_producer_performance(with_dr_cb=True):
    """ Time how long it takes to produce and delivery X messages """
    conf = {'bootstrap.servers': bootstrap_servers,
            'api.version.request': api_version_request,
            'error_cb': error_cb}

    p = confluent_kafka.Producer(**conf)

    msgcnt = 1000000
    msgsize = 100
    msg_pattern = 'test.py performance'
    msg_payload = (msg_pattern * int(msgsize / len(msg_pattern)))[0:msgsize]

    dr = MyTestDr(silent=True)

    t_produce_start = time.time()
    msgs_produced = 0
    msgs_backpressure = 0
    print('# producing %d messages to topic %s' % (msgcnt, topic))

    if with_progress:
        bar = Bar('Producing', max=msgcnt)
    else:
        bar = None

    for i in range(0, msgcnt):
        while True:
            try:
                if with_dr_cb:
                    p.produce(topic, value=msg_payload, callback=dr.delivery)
                else:
                    p.produce(topic, value=msg_payload)
                break
            except BufferError:
                # Local queue is full (slow broker connection?)
                msgs_backpressure += 1
                if bar is not None and (msgs_backpressure % 1000) == 0:
                    bar.next(n=0)
                p.poll(100)
            continue

        if bar is not None and (msgs_produced % 5000) == 0:
            bar.next(n=5000)
        msgs_produced += 1
        p.poll(0)

    t_produce_spent = time.time() - t_produce_start

    bytecnt = msgs_produced * msgsize

    if bar is not None:
        bar.finish()

    print('# producing %d messages (%.2fMb) took %.3fs: %d msgs/s, %.2f Mb/s' %
          (msgs_produced, bytecnt / (1024*1024), t_produce_spent,
           msgs_produced / t_produce_spent,
           (bytecnt/t_produce_spent) / (1024*1024)))
    print('# %d temporary produce() failures due to backpressure (local queue full)' % msgs_backpressure)

    print('waiting for %d/%d deliveries' % (len(p), msgs_produced))
    # Wait for deliveries
    p.flush()
    t_delivery_spent = time.time() - t_produce_start

    print('# producing %d messages (%.2fMb) took %.3fs: %d msgs/s, %.2f Mb/s' %
          (msgs_produced, bytecnt / (1024*1024), t_produce_spent,
           msgs_produced / t_produce_spent,
           (bytecnt/t_produce_spent) / (1024*1024)))

    # Fake numbers if not using a dr_cb
    if not with_dr_cb:
        print('# not using dr_cb')
        dr.msgs_delivered = msgs_produced
        dr.bytes_delivered = bytecnt

    print('# delivering %d messages (%.2fMb) took %.3fs: %d msgs/s, %.2f Mb/s' %
          (dr.msgs_delivered, dr.bytes_delivered / (1024*1024), t_delivery_spent,
           dr.msgs_delivered / t_delivery_spent,
           (dr.bytes_delivered/t_delivery_spent) / (1024*1024)))
    print('# post-produce delivery wait took %.3fs' %
          (t_delivery_spent - t_produce_spent))
Ejemplo n.º 49
0
def save_torch(
    dataset: Dataset,
    path: str,
    prefix: Union[str, Callable[[int, Any], str]] = "example_",
    verbose: bool = True,
    bar: Bar = _DefaultBar,
) -> None:
    r"""Saves the contents of the dataset to multiple files using :func:`torch.save`.

    .. note::
        This is less elegant than HDF5 serialization, but is a thread safe alternative.

    Args:
        dataset (Dataset): The dataset to save.

        path (str): The filepath to save to. Ex ``foo/bar``.

        prefix (str or callable):
            Either a string prefix to append to each ``.pth`` file, or a callable
            that returns a such a string prefix given the example index and example tensors as input.
            Example indices are automatically appended to the target filepath when a string prefix is given,
            but not when a callable prefix is given.
            Output files will be of the form ``{path}/{prefix}{index}.pth``, or
            ``{path}/{prefix}.pth`` when a callable prefix is provided.

        verbose (bool, optional): If False, do not print progress updates during saving.

        bar (:class:`progress.bar.Bar`, optional): Progress bar class

    .. Example:
        >>> str_prefix = "example_"
        >>> save_torch(ds, path="root", prefix=str_prefix)
        >>> # creates files root/example_{index}.pth
        >>>
        >>> callable_prefix = lambda pos, example: f"class_{example[1].item()}/example_{pos}"
        >>> save_torch(ds, path="root", prefix=callable_prefix)
        >>> # creates files root/class_{label_id}/example_{index}.pth

    """
    path = Path(path)
    path.mkdir(parents=True, exist_ok=True)

    if verbose:
        if hasattr(dataset, "__len__"):
            bar = bar(f"Writing to {path}", max=len(dataset))
        else:
            bar = Spinner(f"Writing to {path}")
    else:
        bar = None

    for i, example in enumerate(dataset):
        if isinstance(prefix, str):
            target = Path(path, f"{prefix}{i}.pth")
        else:
            example_prefix = prefix(i, example)
            if not isinstance(example_prefix, str):
                raise ValueError(f"Callable `prefix` must return a str, got {type(example_prefix)}")
            target = Path(path, f"{example_prefix}.pth")

        target.parent.mkdir(parents=True, exist_ok=True)
        torch.save(example, target)
        if bar is not None:
            bar.next()
    if bar is not None:
        bar.finish()
Ejemplo n.º 50
0
def modulate_fm(x,
                fsBB,
                fsIF,
                del_f=75000,
                BB_BW=15000,
                BW=200000,
                A=1,
                debug=False,
                preemph=True,
                fc=0,
                progress=False):
    '''
  SEEMS TO WORK ALRIGHT

  Modulates some signal x with del_f maximum frequency deviation.  The maximum
  message value mp is extracted from x.

  x: The signal to modulate, a 1D np array
  fsBB: The sample rate of the signal x
  fsRF: The sample rate for the modulation
  del_f: delta f, the maximum frequency deviation
  fc: The centre frequency for the modulation
  BW: The final maximum bandwidth of the signal
  A: The amplitude of the output fm modulated signal
  Returns: An fm modulated signal
  '''
    #Convert everything to float...
    fsBB = float(fsBB)
    fsIF = float(fsIF)
    del_f = float(del_f)
    BW = float(BW)
    A = float(A)

    if progress:
        bar = Bar('FM Modulating ...', max=6)
        bar.next()

    taps = 65
    right_edge = BB_BW / fsBB
    b = remez(taps, [0, right_edge * .95, right_edge * .97, 0.5], [1, 0],
              type='bandpass',
              maxiter=100,
              grid_density=32)
    a = 1
    BB = lfilter(b, a, x)
    if progress:
        bar.next()

    if debug == True:
        fig = plt.figure()
        spec_plot(BB, fsBB, fig, sub_plot=(2, 2, 1), plt_title='BB')

    #Perform the modulation, as well as upsampling to fsIF
    T = len(BB) / fsBB  #The period of time x exists for
    N = fsIF * T  #The number of samples for the RF modulation
    if not fsBB == fsIF:
        BB = resample(BB, N)
    mp = max(BB)
    kf = (2. * pi * del_f) / mp
    if progress:
        bar.next()

    #Preemphasis filtering
    if preemph is True:
        taps = 65
        f1 = 2100.
        f2 = 30000.
        G = f2 / f1
        b = remez(taps, [0, f1 / fsIF, f2 / fsIF, 0.5], [1, G],
                  type='bandpass',
                  maxiter=100,
                  grid_density=32)
        a = 1
        BB = lfilter(b, a, BB)

        if debug == True:
            spec_plot(BB,
                      fsIF,
                      fig,
                      sub_plot=(2, 2, 2),
                      plt_title='Preemphasized BB')

    if progress:
        bar.next()
    #FM modulation
    t = linspace(0, T, len(BB))
    BB_integral = cumtrapz(BB, dx=1. / len(BB), initial=0.)
    fm_modIF = A * cos(2 * pi * fc * t + kf * BB_integral)

    DC = np.average(fm_modIF)
    fm_modIF = fm_modIF - DC

    if debug == True:
        spec_plot(fm_modIF,
                  fsIF,
                  fig,
                  sub_plot=(2, 2, 3),
                  plt_title='Modulated')
    if progress:
        bar.next()

    #Bandwidth limiting
    left_edge = (fc - (BW / 2.)) / fsIF
    right_edge = (fc + (BW / 2.)) / fsIF
    taps = 165
    if left_edge <= 0:
        if right_edge == 0.5:
            bands = [0, 0.5]
            gains = [1]
        bands = [0, right_edge * .97, right_edge * .99, 0.5]
        gains = [1, 0]
    elif right_edge == 0.5:
        bands = [0, left_edge * 1.01, left_edge * 1.03, 0.5]
        gains = [0, 1]
    else:
        bands = [
            0, left_edge * 1.01, left_edge * 1.05, right_edge * .95,
            right_edge * .99, 0.5
        ]
        gains = [0, 1, 0]

    b = remez(taps,
              bands,
              gains,
              type='bandpass',
              maxiter=1000,
              grid_density=32)
    a = 1
    fm_modIF = lfilter(b, a, fm_modIF)
    if progress:
        bar.next()

    if debug == True:
        spec_plot(fm_modIF,
                  fsIF,
                  fig,
                  sub_plot=(2, 2, 4),
                  plt_title='Transmitted')
        fig.show()

    mx = max(max(fm_modIF), abs(min(fm_modIF)))
    fm_modIF = fm_modIF / mx  #normalize to 1
    if progress:
        bar.finish()
    return fm_modIF, kf
Ejemplo n.º 51
0
def train(trainloader, model, criterion, optimizer, epoch, use_cuda, iter_size,
          mode, threshold):
    # switch to train mode

    data_time = AverageMeter()
    loss = AverageMeter()

    end = time.time()

    bar = Bar('Processing', max=len(trainloader))
    optimizer.zero_grad()

    for batch_idx, data in enumerate(trainloader):
        frames, masks, objs, infos = data
        # measure data loading time
        data_time.update(time.time() - end)

        if use_cuda:
            frames = frames.cuda()
            masks = masks.cuda()
            objs = objs.cuda()

        objs[objs == 0] = 1

        N, T, C, H, W = frames.size()
        max_obj = masks.shape[2] - 1

        total_loss = 0.0
        out = model(frame=frames, mask=masks, num_objects=objs)
        for idx in range(N):
            for t in range(1, T):
                gt = masks[idx, t:t + 1]
                pred = out[idx, t - 1:t]
                No = objs[idx].item()

                total_loss = total_loss + criterion(pred, gt, No)

        total_loss = total_loss / (N * (T - 1))

        # record loss
        if total_loss.item() > 0.0:
            loss.update(total_loss.item(), 1)

        # compute gradient and do SGD step (divided by accumulated steps)
        total_loss /= iter_size
        total_loss.backward()

        if (batch_idx + 1) % iter_size == 0:
            optimizer.step()
            model.zero_grad()

        # measure elapsed time
        end = time.time()
        # plot progress
        bar.suffix = '({batch}/{size}) Data: {data:.3f}s |Loss: {loss:.5f}'.format(
            batch=batch_idx + 1,
            size=len(trainloader),
            data=data_time.val,
            loss=loss.avg)
        bar.next()
    bar.finish()

    return loss.avg
Ejemplo n.º 52
0
def parallelvideo(cellnr,
                  levels,
                  tEnd,
                  dt,
                  phasearray,
                  couplmatrix,
                  current_time,
                  scalefactor=0.8,
                  step=10,
                  width=1080,
                  height=1080,
                  dpi=100):
    print('finding coordinates...')
    locs = coordhelper(levels,
                       scalefactor)  #dividing a circle into even intervals
    print('coords done')
    cmap = mpl.cm.get_cmap('seismic')  #color map
    # create OpenCV video writer
    video = cv2.VideoWriter('outputs/recursive/{}.avi'.format(current_time),
                            cv2.VideoWriter_fourcc(*'MP42'), 30,
                            (width, height))

    #start parallelization
    framequeue = mp.Queue()  #empty queue
    mats = mp.JoinableQueue()  #for outputs
    frames = mp.Queue()  #to keep track of what frame we're at
    frames.put(-1)  #the first frame will be 0 and will check for framenum -1
    processlist = []  #so we can wait for everything to finish
    maxprocesses = 12  #maybe increase this later?
    bar2 = Bar('creating video', max=tEnd * step)
    for i in range(tEnd * step):  #creating a queue
        framequeue.put(i)

# generating frames
    for i in range(maxprocesses):
        #we create a process which calls the frame queuer, these all go through the
        #queue and call the framemaker, which makes it so we can have multiple
        #framemakers going
        p = mp.Process(target=frame_queuer,
                       args=(
                           framequeue,
                           mats,
                           frames,
                           cellnr,
                           locs,
                           dt,
                           step,
                           couplmatrix,
                           phasearray,
                           width,
                           height,
                           dpi,
                           cmap,
                       ))
        processlist.append(p)
        p.start()

    #we loop untill we've added as many frames as needed
    framesadded = 0
    while framesadded < tEnd * step:
        try:
            newframe = mats.get(timeout=1)
        except:
            pass
        else:
            mats.task_done()
            video.write(newframe)
            framesadded += 1
            bar2.next()

    while not frames.empty():
        frames.get()  #flushing to allow the processes to close
    #print('mats is empty {}'.format(mats.empty()))
    #print('framequeue is empty {}'.format(framequeue.empty()))
    for p in processlist:  #shut down all the processes
        p.join()
        p.close()

    # close video writer
    bar2.finish()
    cv2.destroyAllWindows()
    video.release()
Ejemplo n.º 53
0
    def test(self):
        hdf5_group_prefix = f"/task_{self.task.uid}/test"
        test_data_hdf5_group = self.results_hdf5_file.create_group(
            f"/{hdf5_group_prefix}/data")
        test_loss_hdf5_group = self.results_hdf5_file.create_group(
            f"/{hdf5_group_prefix}/loss")

        traversability_assessment = None
        if self.task.config.get("traversability_assessment",
                                {}).get("active", False):
            traversability_config = self.task.config.get(
                "traversability_assessment", {})
            traversability_assessment = TraversabilityAssessment(
                **traversability_config)

        self.model.eval()

        if self.task.type == TaskTypeEnum.SUPERVISED_LEARNING:
            dataloader = self.task.labeled_dataloader.dataloaders['test']
        else:
            raise NotImplementedError(
                f"The following task type is not implemented: {self.task.type}"
            )

        dataloader_meta_info = DataloaderMetaInfo(dataloader)
        with self.task.loss.new_epoch(0, "test", dataloader_meta_info=dataloader_meta_info), torch.no_grad(), \
             profiler.profile() as prof:
            start_idx = 0
            progress_bar = Bar(f"Test inference for task {self.task.uid}",
                               max=len(dataloader))
            for batch_idx, data in enumerate(dataloader):
                data = self.dict_to_device(data)
                batch_size = data[ChannelEnum.GT_DEM].size(0)

                with profiler.record_function("model_inference"):
                    output = self.model(data)

                if traversability_assessment is not None:
                    output = traversability_assessment(output=output,
                                                       data=data)

                self.add_batch_data_to_hdf5_results(
                    test_data_hdf5_group, data, start_idx,
                    dataloader_meta_info.length)
                self.add_batch_data_to_hdf5_results(
                    test_data_hdf5_group, output, start_idx,
                    dataloader_meta_info.length)

                loss_dict = self.model.loss_function(
                    loss_config=self.task.config["loss"],
                    output=output,
                    data=data,
                    dataloader_meta_info=dataloader_meta_info,
                    reduction="mean_per_sample")
                aggregated_loss_dict = self.task.loss.aggregate_mean_loss_dict(
                    loss_dict)
                self.task.loss(batch_size=batch_size,
                               loss_dict=aggregated_loss_dict)
                self.add_batch_data_to_hdf5_results(
                    test_loss_hdf5_group, loss_dict, start_idx,
                    dataloader_meta_info.length)

                start_idx += batch_size
                progress_bar.next()

            progress_bar.finish()

        with open(str(self.task.logdir / "test_cputime.txt"), "a") as f:
            f.write(prof.key_averages().table(sort_by="cpu_time_total",
                                              row_limit=20))
        prof.export_chrome_trace(
            str(self.task.logdir / "test_cputime_chrome_trace.json"))
        Max = 0
        for j in self_active_neighbor:
            probability = self.active_neighbor[j]
            other_probability = A[j].active_neighbor[self_index]
            utility = probability * other_probability
            if utility >= Max:
                Max = utility
                chosen_agent = j
                chosen_agent_index = i
            i += 1
        return chosen_agent, chosen_agent_index


# =============================================================================

bar = Bar('Processing', max=14)

T = 5000
N = 100
version = '99.01.15_3 long_term off'  #XXX

pd = {'win32': '\\', 'linux': '/'}
if sys.platform.startswith('win32'):
    plat = 'win32'
elif sys.platform.startswith('linux'):
    plat = 'linux'
current_path = os.getcwd()
path = current_path + pd[plat] + 'runned_files' + pd[plat] + 'N%d_T%d' % (
    N, T) + pd[plat] + version + pd[plat]

# with open(path + 'Initials.txt','r') as initf:
Ejemplo n.º 55
0
}

tile_to_colors = {
    4: color_array[0],
    8: color_array[1],
    16: color_array[2],
    32: color_array[3]
}

# generate sizes that we're going to test
sizes = [2**i for i in range(5, 11)]

# collect performance data for cublas
with_cublas = False
if with_cublas:
    with Bar('CUBLAS...    ', max=len(sizes)) as bar:
        cublas = []
        for size in sizes:
            config.matrix_size = size
            config.kernel_type = krnl.KERNEL_CUBLAS

            time = interface.run(config)
            cublas.append(compute_glops(time, config))
            bar.next()

        ax.plot(sizes,
                cublas,
                color='green',
                marker=markers[krnl.KERNEL_CUBLAS],
                linestyle='solid',
                label=interface.kernel_type_to_str(config.kernel_type))
Ejemplo n.º 56
0
    a for b in [[d + x for x in os.listdir(d)] for d in wdir_left] for a in b
][starting_point:cutoff]
raw_images_right = [
    a for b in [[d + x for x in os.listdir(d)] for d in wdir_right] for a in b
][starting_point:cutoff]

wkdir = os.path.dirname(os.path.realpath(__file__))

h, w = 512, 512

channels = 3

#IMAGES WILL BE PROCESSED IN CHUNKS, WHICH WILL BE SAVED AND COMPRESSED AS .NPZ
chunk_length = 1000

bar = Bar('Processing', max=len(raw_images_left))

for chunk_index, chunk in enumerate([
        raw_images_left[i:i + chunk_length]
        for i in range(0, len(raw_images_left), chunk_length)
]):
    x_data, y_data = [], []
    for index, raw_image in enumerate(chunk):
        bar.next()
        xy = []

        image_left = Image.open(raw_image).resize((w, h))
        image_right = Image.open(raw_images_right[index]).resize((w, h))

        image_left_array = np.array(image_left) / 127.5 - 1
        image_right_array = np.array(image_right) / 127.5 - 1
Ejemplo n.º 57
0
    def learn(self):
        """
        Performs num_iters iterations with num_eps episodes of self-play
        """

        for i in range(1, self.config.num_iters + 1):
            print("------iteration " + str(i) + "------")
            if not self.skip_first_self_play or i > 1:
                iteration_train_samples = deque([], maxlen=self.config.max_queue_length)

                episode_time = AverageMeter()
                bar = Bar("Self Play", max=self.config.num_eps)
                end = time.time()

                for episode in range(self.config.num_eps):
                    # reset search tree
                    self.mcts = MCTS(self.neural_net, self.config)
                    iteration_train_samples += self.run_episode()

                    episode_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = "({ep}/{max_ep}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}".format(
                        ep=episode + 1,
                        max_ep=self.config.num_eps,
                        et=episode_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td,
                    )
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.train_samples_history.append(iteration_train_samples)

            if (
                len(self.train_samples_history)
                > self.config.num_iters_for_train_samples_history
            ):
                print(
                    "len(train_samples_history) =",
                    len(self.train_samples_history),
                    " => remove the oldest train_samples",
                )
                self.train_samples_history.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.save_train_samples(i - 1)

            # shuffle examples before training
            train_samples = []
            for e in self.train_samples_history:
                train_samples.extend(e)
            shuffle(train_samples)

            # training new network, keeping a copy of the old one
            self.neural_net.save_checkpoint(
                folder=self.config.checkpoint, filename="temp.pth.tar"
            )
            self.competitor_nn.load_checkpoint(
                folder=self.config.checkpoint, filename="temp.pth.tar"
            )
            previous_mcts = MCTS(self.competitor_nn, self.config)

            self.neural_net.train_from_samples(train_samples)
            new_mcts = MCTS(self.neural_net, self.config)

            print("battling against previous version")
            arena = Arena(
                lambda x: np.argmax(previous_mcts.get_move_probabilities(x, temp=0)),
                lambda x: np.argmax(new_mcts.get_move_probabilities(x, temp=0)),
            )
            prev_wins, new_wins, draws = arena.play_games(self.config.arena_compare)

            print("new/prev wins : %d / %d ; draws : %d" % (new_wins, prev_wins, draws))
            if (
                prev_wins + new_wins == 0
                or float(new_wins) / (prev_wins + new_wins)
                < self.config.update_threshold
            ):
                print("rejecting new model")
                self.neural_net.load_checkpoint(
                    folder=self.config.checkpoint, filename="temp.pth.tar"
                )
            else:
                print("accepting new model")
                self.neural_net.save_checkpoint(
                    folder=self.config.checkpoint, filename=self.get_checkpoint_file(i)
                )
                self.neural_net.save_checkpoint(
                    folder=self.config.checkpoint, filename="best.pth.tar"
                )
def validation(model, val_loader, epoch, writer):
    # set evaluate mode
    model.eval()

    total_correct, total_label = 0, 0
    total_correct_hb, total_label_hb = 0, 0
    total_correct_fb, total_label_fb = 0, 0
    hist = np.zeros((args.num_classes, args.num_classes))
    hist_hb = np.zeros((args.hbody_cls, args.hbody_cls))
    hist_fb = np.zeros((args.fbody_cls, args.fbody_cls))

    # Iterate over data.
    bar = Bar('Processing {}'.format('val'), max=len(val_loader))
    bar.check_tty = False
    for idx, batch in enumerate(val_loader):
        image, target, hlabel, flabel, _ = batch
        image, target, hlabel, flabel = image.cuda(), target.cuda(
        ), hlabel.cuda(), flabel.cuda()
        with torch.no_grad():
            h, w = target.size(1), target.size(2)
            outputs = model(image)
            outputs = gather(outputs, 0, dim=0)
            preds = F.interpolate(input=outputs[0][-1],
                                  size=(h, w),
                                  mode='bilinear',
                                  align_corners=True)
            preds_hb = F.interpolate(input=outputs[1][-1],
                                     size=(h, w),
                                     mode='bilinear',
                                     align_corners=True)
            preds_fb = F.interpolate(input=outputs[2][-1],
                                     size=(h, w),
                                     mode='bilinear',
                                     align_corners=True)
            if idx % 50 == 0:
                img_vis = inv_preprocess(image, num_images=args.save_num)
                label_vis = decode_predictions(target.int(),
                                               num_images=args.save_num,
                                               num_classes=args.num_classes)
                pred_vis = decode_predictions(torch.argmax(preds, dim=1),
                                              num_images=args.save_num,
                                              num_classes=args.num_classes)

                # visual grids
                img_grid = torchvision.utils.make_grid(
                    torch.from_numpy(img_vis.transpose(0, 3, 1, 2)))
                label_grid = torchvision.utils.make_grid(
                    torch.from_numpy(label_vis.transpose(0, 3, 1, 2)))
                pred_grid = torchvision.utils.make_grid(
                    torch.from_numpy(pred_vis.transpose(0, 3, 1, 2)))
                writer.add_image('val_images', img_grid,
                                 epoch * len(val_loader) + idx + 1)
                writer.add_image('val_labels', label_grid,
                                 epoch * len(val_loader) + idx + 1)
                writer.add_image('val_preds', pred_grid,
                                 epoch * len(val_loader) + idx + 1)

            # pixelAcc
            correct, labeled = batch_pix_accuracy(preds.data, target)
            correct_hb, labeled_hb = batch_pix_accuracy(preds_hb.data, hlabel)
            correct_fb, labeled_fb = batch_pix_accuracy(preds_fb.data, flabel)
            # mIoU
            hist += fast_hist(preds, target, args.num_classes)
            hist_hb += fast_hist(preds_hb, hlabel, args.hbody_cls)
            hist_fb += fast_hist(preds_fb, flabel, args.fbody_cls)

            total_correct += correct
            total_correct_hb += correct_hb
            total_correct_fb += correct_fb
            total_label += labeled
            total_label_hb += labeled_hb
            total_label_fb += labeled_fb
            pixAcc = 1.0 * total_correct / (np.spacing(1) + total_label)
            IoU = round(np.nanmean(per_class_iu(hist)) * 100, 2)
            pixAcc_hb = 1.0 * total_correct_hb / (np.spacing(1) +
                                                  total_label_hb)
            IoU_hb = round(np.nanmean(per_class_iu(hist_hb)) * 100, 2)
            pixAcc_fb = 1.0 * total_correct_fb / (np.spacing(1) +
                                                  total_label_fb)
            IoU_fb = round(np.nanmean(per_class_iu(hist_fb)) * 100, 2)
            # plot progress
            bar.suffix = '{} / {} | pixAcc: {pixAcc:.4f}, mIoU: {IoU:.4f} |' \
                         'pixAcc_hb: {pixAcc_hb:.4f}, mIoU_hb: {IoU_hb:.4f} |' \
                         'pixAcc_fb: {pixAcc_fb:.4f}, mIoU_fb: {IoU_fb:.4f}'.format(idx + 1, len(val_loader),
                                                                                    pixAcc=pixAcc, IoU=IoU,
                                                                                    pixAcc_hb=pixAcc_hb, IoU_hb=IoU_hb,
                                                                                    pixAcc_fb=pixAcc_fb, IoU_fb=IoU_fb)
            bar.next()

    print('\n per class iou part: {}'.format(per_class_iu(hist) * 100))
    print('per class iou hb: {}'.format(per_class_iu(hist_hb) * 100))
    print('per class iou fb: {}'.format(per_class_iu(hist_fb) * 100))

    mIoU = round(np.nanmean(per_class_iu(hist)) * 100, 2)
    mIoU_hb = round(np.nanmean(per_class_iu(hist_hb)) * 100, 2)
    mIoU_fb = round(np.nanmean(per_class_iu(hist_fb)) * 100, 2)

    writer.add_scalar('val_pixAcc', pixAcc, epoch)
    writer.add_scalar('val_mIoU', mIoU, epoch)
    writer.add_scalar('val_pixAcc_hb', pixAcc_hb, epoch)
    writer.add_scalar('val_mIoU_hb', mIoU_hb, epoch)
    writer.add_scalar('val_pixAcc_fb', pixAcc_fb, epoch)
    writer.add_scalar('val_mIoU_fb', mIoU_fb, epoch)
    bar.finish()

    return pixAcc, mIoU
Ejemplo n.º 59
0
def verify_batch_consumer_performance():
    """ Verify batch Consumer performance """

    conf = {'bootstrap.servers': bootstrap_servers,
            'group.id': uuid.uuid1(),
            'session.timeout.ms': 6000,
            'error_cb': error_cb,
            'default.topic.config': {
                'auto.offset.reset': 'earliest'
            }}

    c = confluent_kafka.Consumer(**conf)

    def my_on_assign(consumer, partitions):
        print('on_assign:', len(partitions), 'partitions:')
        for p in partitions:
            print(' %s [%d] @ %d' % (p.topic, p.partition, p.offset))
        consumer.assign(partitions)

    def my_on_revoke(consumer, partitions):
        print('on_revoke:', len(partitions), 'partitions:')
        for p in partitions:
            print(' %s [%d] @ %d' % (p.topic, p.partition, p.offset))
        consumer.unassign()

    c.subscribe([topic], on_assign=my_on_assign, on_revoke=my_on_revoke)

    max_msgcnt = 1000000
    bytecnt = 0
    msgcnt = 0
    batch_size = 1000

    print('Will now consume %d messages' % max_msgcnt)

    if with_progress:
        bar = Bar('Consuming', max=max_msgcnt,
                  suffix='%(index)d/%(max)d [%(eta_td)s]')
    else:
        bar = None

    while msgcnt < max_msgcnt:
        # Consume until we hit max_msgcnt

        msglist = c.consume(num_messages=batch_size, timeout=20.0)

        for msg in msglist:
            if msg.error():
                if msg.error().code() == confluent_kafka.KafkaError._PARTITION_EOF:
                    # Reached EOF for a partition, ignore.
                    continue
                else:
                    raise confluent_kafka.KafkaException(msg.error())

            bytecnt += len(msg)
            msgcnt += 1

            if bar is not None and (msgcnt % 10000) == 0:
                bar.next(n=10000)

            if msgcnt == 1:
                t_first_msg = time.time()

    if bar is not None:
        bar.finish()

    if msgcnt > 0:
        t_spent = time.time() - t_first_msg
        print('%d messages (%.2fMb) consumed in %.3fs: %d msgs/s, %.2f Mb/s' %
              (msgcnt, bytecnt / (1024*1024), t_spent, msgcnt / t_spent,
               (bytecnt / t_spent) / (1024*1024)))

    print('closing consumer')
    c.close()
Ejemplo n.º 60
0
def verify_stats_cb():
    """ Verify stats_cb """

    def stats_cb(stats_json_str):
        global good_stats_cb_result
        stats_json = json.loads(stats_json_str)
        if topic in stats_json['topics']:
            app_offset = stats_json['topics'][topic]['partitions']['0']['app_offset']
            if app_offset > 0:
                print("# app_offset stats for topic %s partition 0: %d" %
                      (topic, app_offset))
                good_stats_cb_result = True

    conf = {'bootstrap.servers': bootstrap_servers,
            'group.id': uuid.uuid1(),
            'session.timeout.ms': 6000,
            'error_cb': error_cb,
            'stats_cb': stats_cb,
            'statistics.interval.ms': 200,
            'default.topic.config': {
                'auto.offset.reset': 'earliest'
            }}

    c = confluent_kafka.Consumer(**conf)
    c.subscribe([topic])

    max_msgcnt = 1000000
    bytecnt = 0
    msgcnt = 0

    print('Will now consume %d messages' % max_msgcnt)

    if with_progress:
        bar = Bar('Consuming', max=max_msgcnt,
                  suffix='%(index)d/%(max)d [%(eta_td)s]')
    else:
        bar = None

    while not good_stats_cb_result:
        # Consume until EOF or error

        msg = c.poll(timeout=20.0)
        if msg is None:
            raise Exception('Stalled at %d/%d message, no new messages for 20s' %
                            (msgcnt, max_msgcnt))

        if msg.error():
            if msg.error().code() == confluent_kafka.KafkaError._PARTITION_EOF:
                # Reached EOF for a partition, ignore.
                continue
            else:
                raise confluent_kafka.KafkaException(msg.error())

        bytecnt += len(msg)
        msgcnt += 1

        if bar is not None and (msgcnt % 10000) == 0:
            bar.next(n=10000)

        if msgcnt == 1:
            t_first_msg = time.time()
        if msgcnt >= max_msgcnt:
            break

    if bar is not None:
        bar.finish()

    if msgcnt > 0:
        t_spent = time.time() - t_first_msg
        print('%d messages (%.2fMb) consumed in %.3fs: %d msgs/s, %.2f Mb/s' %
              (msgcnt, bytecnt / (1024*1024), t_spent, msgcnt / t_spent,
               (bytecnt / t_spent) / (1024*1024)))

    print('closing consumer')
    c.close()