Exemple #1
0
def inicializa_banco():
    # DELETE ALL
    # User.objects.delete({})

    if User.objects().first() is None:
        print("### ADICIONANDO CLIENTES A BASE DE DADOS ###")
        with open('clientes.json') as f:
            file_data = json.load(f)
        bar = pyprind.ProgBar(int(len(file_data)), bar_char='█', stream=sys.stdout)
        for client in file_data:
            bar.update()
            User(nome=str(client['nome']), cpf=str(client['cpf']), celular=str(client['celular']),
                 score=int(client['score']), negativado=bool(client['negativado'])).save()

    else:
        print("### USANDO BASE DE DADOS EXISTENTE ###")
Exemple #2
0
    def get_movie_vectors(self):

        user_count = self.db.users.count()
        print "Preparing movie vectors ( user count: %d )" % (user_count)
        movies = {}

        my_prbar = pyprind.ProgBar(user_count)

        for user in self.db.users.find():
            for movie in user['ratings']:
                if not movie in movies.keys():
                    movies[movie] = {}
                    movies[movie][
                        user['id']] = user['ratings'][movie]['rating']
                    my_prbar.update()
                    return movies
Exemple #3
0
def make_table(csv_in, csv_out):
    df = pd.read_csv(csv_in, sep=',')
    df['Lyrics'] = pd.Series('', index=df.index)

    progress_bar = False
    if df.shape[0] > 1:
        progress_bar = pp.ProgBar(df.shape[0])

    for row in df.index:
        song = Song(artist=df.ix[row][df.columns[0]],
                    title=df.ix[row][df.columns[1]])
        lyr = song.lyricwikia()
        df.ix[row][df.columns[2]] = lyr
        if progress_bar:
            progress_bar.update()
    df.to_csv(csv_out)
Exemple #4
0
 def train_direction(self, img):
     self.weights = defaultdict(lambda: defaultdict(Counter))
     width, height = img.size
     img = np.array(img)[:, :, :3]
     prog = pyprind.ProgBar((width * height), width=64, stream=1)
     for x in range(height):
         for y in range(width):
             pix = tuple(self.normalize(img[x, y]))
             prog.update()
             for dir, neighbour in self.get_neighbours_dir(x, y).items():
                 try:
                     self.weights[pix][dir][tuple(
                         self.normalize(img[neighbour]))] += 1
                 except IndexError:
                     continue
     self.directional = True
    def add_new_wheels_in_chunks(connection, wheels, total_wheels):
        """
        Method used to add Wheel Pros Wheels
        :param wheels: wheels added
        :param total_wheels: total wheels to be added
        :return: Nothing
        """
        # TODO: Need to test this method

        bar = pyprind.ProgBar(total_wheels, monitor=True, update_interval=.1)
        total_added = 1
        # This is for each of the sections
        sections_done = 1
        for i in range(len(wheels)):
            total_in_section_done = 1
            for j in range(len(wheels[i])):
                try:
                    if i * j >= -1:
                        time.sleep(0.25)
                        w = wheels[i][j]
                        ShopifyToolsWheels.add_new_wheel(connection, w)
                        bar_graph_string = "Section: " + str(sections_done)
                        bar_graph_string += " - Index: " + str(total_added)
                        bar.update(item_id=bar_graph_string)
                        total_added += 1
                        total_in_section_done += 1
                        # This worked!!! I don't know what happened, but it worked!
                        # The error was caught, and then it continued. It happened
                        # at 5221

                except pyactiveresource.connection.Error:
                    print("Internet is out, restarting server in 5 seconds")
                    j -= 1
                    time.sleep(10)
                except TimeoutError:
                    total_added -= 1
                    j -= 1
                    print("Timeout error has occured, restarting server in 5 seconds")
                    time.sleep(10)
                except HTTPError:
                    total_added -= 1
                    j -= 1
                    print("HTTP error has occured, restarting server in 5 seconds")
                    time.sleep(10)
            sections_done += 1

        print(bar)
Exemple #6
0
def _block_candset_split(c_df, l_df, r_df, l_key, r_key, fk_ltable, fk_rtable,
                         black_box_function_pkl, show_progress):

    # initialize the progress bar
    if show_progress:
        bar = pyprind.ProgBar(len(c_df))

    # create lookup dictionaries for faster processing
    l_dict = {}
    r_dict = {}

    # list to keep track of valid ids
    valid = []

    # find positions of the ID attributes of the two tables in the candset
    l_id_pos = list(c_df.columns).index(fk_ltable)
    r_id_pos = list(c_df.columns).index(fk_rtable)

    # unpickle the black box function
    black_box_function = pickle.loads(black_box_function_pkl)

    # iterate candidate set
    for row in c_df.itertuples(index=False):
        # # update progress bar
        if show_progress:
            bar.update()
        
        # # get ltuple, try dictionary first, then dataframe
        row_lkey = row[l_id_pos]
        if row_lkey not in l_dict:
            l_dict[row_lkey] = l_df.ix[row_lkey]
        ltuple = l_dict[row_lkey]

        # # get rtuple, try dictionary first, then dataframe
        row_rkey = row[r_id_pos]
        if row_rkey not in r_dict:
            r_dict[row_rkey] = r_df.ix[row_rkey]
        rtuple = r_dict[row_rkey]

        # # apply the black box function to the tuple pair
        res = black_box_function(ltuple, rtuple)
        if res != True:
            valid.append(True)
        else:
            valid.append(False)

    return valid
Exemple #7
0
    def sarsa_train(self):
        """
        method that implements Sarsa learning algorithm
        """
        iter_per_episode = 20
        print("Algorithm: SARSA")
        print("Number of episodes:", self.episodes)
        print("Number of iterations per episode:", iter_per_episode)
        print("Progress:\n")

        bar = pyprind.ProgBar(self.episodes)
        for episode in range(self.episodes):
            
            self.Q[self.final[:, 0], self.final[:, 1], :, :, :] = 0
            
            # initialize state to arbitrary values
            y = np.random.choice(self.track.shape[0])
            x = np.random.choice(self.track.shape[1])
            v_y = np.random.choice(self.velocities) 
            v_x = np.random.choice(self.velocities)
            
            a = np.argmax(self.Q[y, x, v_y, v_x])
            self.y, self.x, self.v_y, self.v_x = y, x, v_y, v_x

            for _ in range(iter_per_episode):
                if self.track[y, x] == "F" or self.track[y, x] == "#": 
                    break
                    
                # update state
                self.update_state(self.actions[a], self.action_probab) 
                
                # choose the best action for a give state-action pair
                a_prime = np.argmax(self.Q[self.y, self.x, self.v_y, self.v_x])
                
                reward = -1
                self.Q[y, x, v_y, v_x, a] = ((1 - self.learning_rate)*self.Q[y, x,v_y, v_x, a] +
                    self.learning_rate*(reward + self.gamma*self.Q[self.y, self.x, self.v_y, self.v_x, a_prime]))
                y, x, v_y, v_x = self.y, self.x, self.v_y, self.v_x
                a = a_prime
            
            # make a simulation of the race
            if episode%50000==0:
                self.make_policy()
                self.simulate()
            bar.update()

        print(bar)
Exemple #8
0
    def index_resources(self,
                        resources=None,
                        batch_size=settings.BULK_IMPORT_BATCH_SIZE,
                        quiet=False):
        """
        Indexes a list of resources in bulk to Elastic Search

        Keyword Arguments:
        resources -- the list of resource instances to index
        batch_size -- the number of records to index as a group, the larger the number to more memory required
        quiet -- Silences the status bar output during certain operations, use in celery operations for example

        Return: None
        """

        start = datetime.now()
        q = Query(se=self.se)
        self.se.refresh(index=self.index_name)
        count_before = self.se.count(index=self.index_name, body=q.dsl)
        result_summary = {"database": len(resources), "indexed": 0}
        if quiet is False:
            bar = pyprind.ProgBar(len(resources),
                                  bar_char="█") if len(resources) > 1 else None
        with self.se.BulkIndexer(batch_size=batch_size,
                                 refresh=True) as indexer:
            for resource in resources:
                if quiet is False and bar is not None:
                    bar.update(item_id=resource)
                tiles = list(
                    models.TileModel.objects.filter(resourceinstance=resource))
                document, doc_id = self.get_documents_to_index(resource, tiles)
                if document is not None and id is not None:
                    indexer.add(index=self.index_name,
                                id=doc_id,
                                data=document)

        self.se.refresh(index=self.index_name)
        result_summary["indexed"] = self.se.count(index=self.index_name,
                                                  body=q.dsl) - count_before
        status = "Passed" if result_summary["database"] == result_summary[
            "indexed"] else "Failed"
        print(
            f"Custom Index - {settings.ELASTICSEARCH_PREFIX}_{self.index_name}"
        )
        print(
            f"    Status: {status}, In Database: {result_summary['database']}, Indexed: {result_summary['indexed']}, Took: {(datetime.now() - start).seconds} seconds"
        )
Exemple #9
0
    def run(self):
        """
        Runs the Backtest.
        """
        if self.has_run:
            return

        # set run flag to avoid running same test more than once
        self.has_run = True

        # setup strategy
        self.strategy.setup(self.data)

        # adjust strategy with initial capital
        self.strategy.adjust(self.initial_capital)

        # loop through dates
        # init progress bar
        if self.progress_bar:
            bar = pyprind.ProgBar(len(self.dates),
                                  title='\n' + self.name,
                                  stream=1)

        # since there is a dummy row at time 0, start backtest at date 1.
        # we must still update for t0
        self.strategy.update(self.dates[0])

        # and for the backtest loop, start at date 1
        for dt in self.dates[1:]:
            # update progress bar
            if self.progress_bar:
                bar.update()

            # update strategy
            self.strategy.update(dt)

            if not self.strategy.bankrupt:
                self.strategy.run()
                # need update after to save weights, values and such
                self.strategy.update(dt)
            else:
                if self.progress_bar:
                    bar.stop()

        self.stats = self.strategy.prices.calc_perf_stats()
        self._original_prices = self.strategy.prices
Exemple #10
0
def loadDataset(basePath = 'data'):
    labels = {'pos': 1, 'neg': 0}
    pbar = pyprind.ProgBar(50000)

    df = pd.DataFrame()

    for s in ('test', 'train'):
        for l in ('pos', 'neg'):
            path = os.path.join(basePath, s, l)
            for file in os.listdir(path):
                with open(os.path.join(path, file), 'r', encoding='utf-8') as infile:
                    txt = infile.read()
                    df = df.append([[txt, labels[l]]], ignore_index=True)
                    pbar.update()

    df.columns = ['review', 'sentiment']
    return df
def textExtraction():
    pbar = pyprind.ProgBar(50000)
    labels = {"pos": 1, "neg": 0}
    data = pd.DataFrame()
    for s in ("test", "train"):
        for l in ("pos", "neg"):
            path = "./%s/%s" % (s, l)
            for file in os.listdir(path):
                with open(os.path.join(path, file), "r",
                          encoding="utf-8") as f:
                    txt = f.read()
                data = data.append([[txt, labels[l]]], ignore_index=True)
                pbar.update()
    data.columns = ["review", "sentiment"]
    np.random.seed(0)
    data = data.reindex(np.random.permutation(data.index))
    data.to_csv("./movie_data.csv", index=False)
Exemple #12
0
def get_authors_similariy(Edges, GAA, info, train=True):
    """ 
	compute authors similarity on authors-co-authorship graph (GAA) 
	"""
    print 'Computing authors similarity...'

    if train:
        t0 = time()
        sim, idx = simrank(GAA, c=0.8, max_iter=100, eps=1e-4)
        s_time = time() - t0
        print("Simrank runtime: %0.3fs" % s_time)
        np.save('../data/GAA_similarity', sim)
    else:
        sim = np.load('../data/GAA_similarity.npy')
        nodes = GAA.nodes()
        idx = {
            k: v
            for (k, v) in [(nodes[i], i) for i in range(0, len(nodes))]
        }

    print 'Retrieving citing and cited authors similarities...'
    similarity = []
    bar = pyprind.ProgBar(len(Edges), bar_char='█', width=barwidth)
    for e in Edges[:]:
        ids = int(e[0])
        idt = int(e[1])
        source_info = info.loc[ids]
        source_auth = set(source_info["authors"])
        while 'unknown' in source_auth:
            source_auth.remove('unknown')

        target_info = info.loc[idt]
        target_auth = set(target_info["authors"])
        while 'unknown' in target_auth:
            target_auth.remove('unknown')
        if len(source_auth) and len(target_auth):
            st_sim = [
                sim[idx[u], idx[v]]
                for u, v in itertools.product(source_auth, target_auth)
            ]
            st_sim = max(st_sim)
        else:
            st_sim = 0
        similarity.append(st_sim)
        bar.update()
    return similarity
Exemple #13
0
def permutation_test(X_scaled,
                     Y_scaled,
                     X_saliences,
                     Y_saliences,
                     singular_values,
                     inertia,
                     n_perm,
                     verbose=False,
                     algorithm="randomized"):
    n_components = X_saliences.shape[1]
    singular_values_samples = np.zeros((n_components, n_perm))

    if verbose:
        my_perc = pyprind.ProgBar(n_perm,
                                  stream=1,
                                  title='running permutations',
                                  monitor=True)
        #import warnings
        #warnings.filterwarnings("ignore")
    for perm_i in range(n_perm):
        _permute_and_calc_singular_values(X_scaled,
                                          Y_scaled,
                                          X_saliences,
                                          Y_saliences,
                                          singular_values_samples,
                                          perm_i,
                                          n_components,
                                          algorithm=algorithm)
        if verbose:
            my_perc.update()
    if verbose:
        print(my_perc)
        print("calculating p values")

    saliences_p_vals = np.zeros((n_components, ))
    for component_i in range(n_components):
        saliences_p_vals[component_i] = old_div(
            (100.0 - percentileofscore(singular_values_samples[component_i, :],
                                       singular_values[component_i])), 100.0)

    inertia_p_val = old_div(
        (100.0 -
         percentileofscore(singular_values_samples.sum(axis=0), inertia)),
        100.0)

    return saliences_p_vals, inertia_p_val
Exemple #14
0
def build_graph(Xtrain, Ytrain, info):
    """
	Citations graph (main graph)
	"""
    bar = pyprind.ProgBar(len(Xtrain), bar_char='█', width=barwidth)
    pairs = []
    for i in range(len(Xtrain)):
        if Ytrain.values[i] == 1:
            pairs.append([Xtrain.values[i, 0], Xtrain.values[i, 1]])
        bar.update()
    print 'Adding citations edges to the graph'
    G = nx.DiGraph(pairs)
    # Add the remaining nodes:
    G.add_nodes_from(info.index)
    print 'Number of nodes:', G.number_of_nodes()
    print 'Number of edges:', G.number_of_edges()
    return G
def make_table(csv_in, csv_out, last_fm_network):
    df = pd.read_csv(csv_in, sep=',')
    df['Tags'] = pd.Series('', index=df.index)

    progress_bar = False
    if df.shape[0] > 1:
        progress_bar = pyprind.ProgBar(df.shape[0])

    for row in df.index:
        song = LastFMSong(artist=df.ix[row][df.columns[0]],
                          title=df.ix[row][df.columns[1]],
                          last_fm_network=last_fm_network)
        tags = song.get_tags()
        df.ix[row][df.columns[2]] = tags
        if progress_bar:
            progress_bar.update()
    df.to_csv(csv_out)
Exemple #16
0
def neflix_features(threshold=10):
    from textblob import TextBlob
    netflix_imdb = cPickle.load(open("Neflix_IMDB.p"))
    bar = pyprind.ProgBar(len(netflix_imdb))

    people_dict = {}
    keywords_dict = {}
    others_dict = {}
    for neflix_id, imdb_id in netflix_imdb.items():
        movie = cPickle.load(gzip.open('/data/imdb/' + imdb_id + ".p", 'rb'))
        people = set()
        director = movie.get('director')
        writer = movie.get('writer')
        producer = movie.get('producer')
        composer = movie.get('composer')
        if movie.get('cast') is not None:
            cast = movie.get('cast')[0:min(5, len(movie.get('cast')))]
        else:
            cast = None
        for fs in [director, cast]:  #writer, producer, composer,
            if fs is not None:
                people.update(fs)
        people_dict[neflix_id - 1] = people

        # keywords
        if "keywords" in movie["keywords"]:
            keywords = [
                k for k in movie["keywords"]["keywords"] if "n-title" not in k
            ]
        else:
            keywords = []
        keywords_dict[neflix_id - 1] = set(keywords)
        # other stuff
        other = set()
        country = movie.get('country', u'')
        genres = movie.get('genres')
        year = ["Year " + str((int(movie.get('year')) / 10) * 10)
                ] if movie.get('year') is not None else []
        for fs in [country, genres, year]:
            if fs is not None:
                other.update(fs)
        others_dict[neflix_id - 1] = other
        bar.update()

    return people_dict, keywords_dict, others_dict
Exemple #17
0
def cube_filter_highpass(array,
                         mode,
                         median_size=5,
                         kernel_size=5,
                         fwhm_size=5,
                         btw_cutoff=0.2,
                         btw_order=2):
    """ Wrapper of *frame_filter_highpass* for cubes or 3d arrays.

    Parameters
    ----------
    array : array_like
        Input 3d array.
    mode : {'kernel-conv', 'median-subt', 'gauss-subt', 'fourier-butter'}
        Type of High-pass filtering.
    median_size : int
        Size of the median box for filtering the low-pass median filter.
    kernel_size : 3, 5 or 7
        Size of the Laplacian kernel for convolution. 
    fwhm_size : int
        Size of the Gaussian kernel for the low-pass Gaussian filter.
    btw_cutoff : float
        Frequency cutoff for low-pass 2d Butterworth filter.
    btw_order : int
        Order of low-pass 2d Butterworth filter.
    
    Returns
    -------
    filtered : array_like
        High-pass filtered cube.
    """
    if not array.ndim == 3:
        raise TypeError('Input array is not a cube or 3d array')

    n_frames = array.shape[0]
    array_out = np.zeros_like(array)
    msg = 'Applying the High-Pass filter on cube frames'
    bar = pyprind.ProgBar(n_frames, stream=1, title=msg)
    for i in range(n_frames):
        array_out[i] = frame_filter_highpass(array[i], mode, median_size,
                                             kernel_size, fwhm_size,
                                             btw_cutoff, btw_order)
        bar.update()

    return array_out
Exemple #18
0
    def _train(self):
        self.model.train()
        loader = self.train_loader
        self.current_epoch += 1

        if self.verbose:
            """
            progress_bar = tqdm(total=len(loader),
                                desc='Epoch-'
                                     + str(self.current_epoch)
                                     + ', Current Metric - '
                                     + str(self.current_metric_value),
                                file=sys.stdout,
                                leave=False,
                                ncols=100,
                                position=0,
                                unit=' batch')
                                """
            desc = 'Epoch-' + str(
                self.current_epoch) + ', Current Metric - ' + str(
                    self.current_metric_value)
            # Source inspired by: https://github.com/rasbt/pyprind/blob/master/examples/pyprind_demo.ipynb
            bar = pyprind.ProgBar(len(loader),
                                  track_time=True,
                                  title=desc,
                                  monitor=True,
                                  width=100)
        else:
            #progress_bar = None
            bar = None

        for batch_idx, (inputs, targets) in enumerate(deepcopy(loader)):
            inputs, targets = inputs.to(self.device), targets.to(self.device)
            self.optimizer.zero_grad()
            outputs = self.model(inputs)
            loss = self.loss_function(outputs, targets)
            loss.backward()
            self.optimizer.step()
            if self.verbose:
                if batch_idx % 10 == 0:
                    #progress_bar.update(10)
                    bar.update(iterations=10)
        if self.verbose:
            #progress_bar.close()
            bar.stop()
Exemple #19
0
    def extract(cls, dirpath, db_name, use_mp=True):
        assert os.path.exists(dirpath)
        if os.path.isdir(dirpath):
            filelist = [
                filename
                for filename in os.listdir(dirpath)
                if os.path.isfile(os.path.join(dirpath, filename))
                and filename.split(".")[-1].lower() == "spold"
            ]
        elif os.path.isfile(dirpath):
            filelist = [dirpath]
        else:
            raise OSError("Can't understand path {}".format(dirpath))

        if sys.version_info < (3, 0):
            use_mp = False

        if use_mp:
            with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
                print("Extracting XML data from {} datasets".format(len(filelist)))
                results = [
                    pool.apply_async(
                        Ecospold2DataExtractor.extract_activity,
                        args=(dirpath, x, db_name),
                    )
                    for x in filelist
                ]
                data = [p.get() for p in results]
        else:
            pbar = pyprind.ProgBar(
                len(filelist), title="Extracting ecospold2 files:", monitor=True
            )

            data = []
            for index, filename in enumerate(filelist):
                data.append(cls.extract_activity(dirpath, filename, db_name))
                pbar.update(item_id=filename[:15])

            print(pbar)

        if sys.version_info < (3, 0):
            print("Converting to unicode")
            return recursive_str_to_unicode(data)
        else:
            return data
def retrieve_image_map(img_features,
                       txt_features,
                       class_list_doc2vec,
                       joint_model,
                       joint_model_ext=None,
                       joint_model_weights_ext=None,
                       load_precomputed_embedded_feat=None,
                       verbose=False,
                       progressbar=True):
    def printv(str):
        if verbose: print(str)

    emb_txts, emb_imgs, img_labels = get_embedded_vectors(
        img_features, txt_features, joint_model, joint_model_ext,
        joint_model_weights_ext, load_precomputed_embedded_feat, verbose)

    if not isinstance(class_list_doc2vec, list):
        class_list_doc2vec = load_class_list(class_list_doc2vec)

    if progressbar:
        bar = pyprind.ProgBar(len(emb_txts), stream=sys.stdout)

    C = compute_dist_scores(emb_txts, emb_imgs)
    av_prec = []
    for i, dv in enumerate(emb_txts):
        scores = []
        targets = []
        if progressbar:
            bar.update()

        lbl = int(class_list_doc2vec[i])
        for j, im_label in enumerate(img_labels):
            target = not bool(im_label[0] - lbl)
            score = C[i, j]
            scores.append(score)
            targets.append(target)

        from sklearn.metrics import average_precision_score
        AP = average_precision_score(targets, scores)
        av_prec.append(AP)
        printv("Class {} - AP = {}".format(lbl, AP))

    mAP = np.mean(np.asarray(av_prec))
    printv("\t\tmAP = {}".format(mAP))
    return mAP
Exemple #21
0
def load_json(json_data, l_block, l_access_w, l_access_r):
    id_block = 0

    if json_data["is_64"] == 1:
        _dynStruct.bits = 64
    else:
        _dynStruct.bits = 32

    try:
        prbar = pyprind.ProgBar(len(json_data["blocks"]), track_time=False, title="Loading Json data")
        for block in filter(None, json_data["blocks"]):
            l_block.append(_dynStruct.Block(block, l_access_w, l_access_r, id_block))
            id_block += 1
            prbar.update()
    except KeyError as e:
        print("Json not from dynamoRIO client, missing : %s" % str(e))
        return False
    return True
Exemple #22
0
    def __copy_files__(backup: ParsedBackup, files: list, destination: Path):
        bar = pyprind.ProgBar(len(files))
        for file in files:
            fileID = file[0]

            # Need to investigate this:
            if not fileID in backup.found_files:
                #    print(f"File {fileID} exists in Manifest.db, but not in pybackup.json!")
                continue

            absoluteSource = backup.path / backup.found_files[fileID]['path']
            absoluteDestination: Path = destination / sanitize_filepath(
                file[2])

            os.makedirs(os.path.dirname(absoluteDestination), exist_ok=True)

            shutil.copy(absoluteSource, absoluteDestination)
            bar.update()
def read_xlsb(filepath, worksheet, pbar_total=None):
    wb = pyxlsb.open_workbook(str(filepath))
    sheet = wb.get_sheet(worksheet)

    if not pbar_total:
        return [[o.v for o in row] for row in sheet.rows()]
    else:

        def iterate_rows(sheet, bar):
            for row in sheet.rows():
                bar.update()
                yield row

        print("Loading file: ", filepath)
        bar = pyprind.ProgBar(pbar_total)
        data = [[o.v for o in row] for row in iterate_rows(sheet, bar)]
        print(bar)
        return data
Exemple #24
0
def check(text, label, f):
    pbar = pyprind.ProgBar(100)
    stopword = stopwords.words('english')
    cntcor = 0
    cnttot = 0
    for i in range(len(text)):
        lines = text[i].strip()
        lines = re.sub(r'[\W]+', ' ', lines.lower())
        tmp = [word for word in lines.split() if word not in stopword]
        ans, ann = knn(tmp, f, K=5)
        if (ann == 1) & (label[i] == 1):
            cntcor += 1
        if (ann == -1) & (label[i] == 0):
            cntcor += 1
        cnttot += 1
        pbar.update()
    print("\n\n\n final correct rate:")
    print(str(cntcor * 1.0 / cnttot))
Exemple #25
0
def precision(df, tree, n=100):
    """
    :param df:
    :param tree:
    :param n:
    :return:
    """
    import pyprind
    bar = pyprind.ProgBar(n, track_time=True, stream=1)
    p = 0
    for i in range(n):
        obs = df.sample(n=1).values[0]
        t = obs[-1]
        obs = obs[:-1]
        clf = classify(obs, tree)
        if t in clf: p += 1
        bar.update()
    return float(p) / float(n)
def generate_prediction(classifier, testing_file_content,
                        testing_image_feature_dict, prediction_file_prefix):
    """Generate prediction.
    
    :param classifier: the classifier
    :type classifier: object
    :param testing_file_content: the content in the testing file
    :type testing_file_content: numpy array
    :param testing_image_feature_dict: the features of the testing images which is saved in a dict
    :type testing_image_feature_dict: dict
    :param prediction_file_prefix: the prefix of the prediction file
    :type prediction_file_prefix: string
    :return: the prediction file will be saved to disk
    :rtype: None
    """

    print("\nGenerating prediction ...")

    # Add progress bar
    progress_bar = pyprind.ProgBar(testing_file_content.shape[0], monitor=True)

    # Generate prediction
    prediction_list = []
    for _, file_1_name, file_2_name in testing_file_content:
        file_1_feature = testing_image_feature_dict[file_1_name]
        file_2_feature = testing_image_feature_dict[file_2_name]
        final_feature = get_final_feature(file_1_feature, file_2_feature)
        final_feature = final_feature.reshape(1, -1)

        probability_estimates = classifier.predict_proba(final_feature)
        prediction = probability_estimates[0, 1]
        prediction_list.append(prediction)

        # Update progress bar
        progress_bar.update()

    # Report tracking information
    print(progress_bar)

    # Write prediction
    prediction_file_name = prediction_file_prefix + str(int(
        time.time())) + ".csv"
    write_prediction(testing_file_content, np.array(prediction_list),
                     prediction_file_name)
def get_common_resolution(all_res_df):

    print "Obtaining common resolution per video session."

    all_common_res_df = pd.DataFrame(columns=["session_id", "common_res"])

    session_ids_array = []
    common_res_array = []

    all_res_sessions = all_res_df["session_id"]
    all_res_values = all_res_df["resolution_mc"]

    compact_all_res_df = pd.DataFrame(columns=["session_id", "resolution_mc"])
    compact_all_res_df["session_id"] = all_res_sessions
    compact_all_res_df["resolution_mc"] = all_res_values

    all_video_sessions = all_res_df.session_id.unique()

    print "Unique Video Sessions: " + str(len(all_video_sessions))
    bar = pyprind.ProgBar(len(all_video_sessions),
                          monitor=True,
                          title="Get Common Resolution")

    # cont = 0
    for video_session in all_video_sessions:
        query_str = "session_id == '" + video_session + "'"
        tmp_df = compact_all_res_df.query(query_str)
        if tmp_df.loc[:, "resolution_mc"].mode().size > 0:
            common_res = tmp_df.loc[:, "resolution_mc"].mode()[0]
            session_ids_array.append(video_session)
            common_res_array.append(common_res)
        else:
            common_res = tmp_df.iloc[0]["resolution_mc"]
            session_ids_array.append(video_session)
            common_res_array.append(common_res)
        # cont += 1
        # print "Processed "+str(cont)+"/"+str(len(all_video_sessions))+" video sessions."
        bar.update()

    print bar
    all_common_res_df["session_id"] = session_ids_array
    all_common_res_df["common_res"] = common_res_array

    return all_common_res_df
Exemple #28
0
def train(args, env, agent, opt, update=train_update, verbose=True):
    train_rewards = []
    iter_reward = []
    train_start = time()
    train_steps = 0
    num_updates = 0
    while train_steps < args.n_steps and not agent.done():
        state = env.reset()
        episode_reward = 0.0
        hidden_state = sample_lstm_state(args)
        for path in range(args.max_path_length):
            while agent.updatable():
                env._update()
                update(args, env, agent, opt)
                num_updates += 1

            if train_steps % args.print_interval == 0:
                progbar = pyprind.ProgBar(args.print_interval) 
                denom = max(1, len(iter_reward))
                train_rewards.append(sum(iter_reward) / denom)
                if verbose and train_steps > 0:
                    n_iter = train_steps // args.print_interval
                    timing = time() - train_start
                    print_stats('Train', iter_reward, n_iter, timing,
                                train_steps, num_updates, agent)
                iter_reward = []

            action, action_info = agent.forward(state, hidden_state)
            hidden_state = action_info.returns[0]
            if args.render:
                env.render()
            next_state, reward, done, _ = env.step(action)
            agent.learn(
                state, action, reward, next_state, done, info=action_info)
            train_steps += 1
            progbar.update()
            episode_reward += reward

            if done or agent.done():
                break
            state = next_state
        agent.new_episode(done)
        iter_reward.append(episode_reward) 
    return train_rewards
def Sort_Smooth(Energy, States, pb=False):
    ''' Sort states to remove false avoided crossings.

    This is a function to ensure that all eigenstates plotted change
    adiabatically, it does this by assuming that step to step the eigenstates
    should vary by only a small amount (i.e. that the  step size is fine) and
    arranging states to maximise the overlap one step to the next.

    Args:
        Energy (numpy.ndarray) : numpy.ndarray containing the eigenergies, as from numpy.linalg.eig
        States (numpy.ndarray): numpy.ndarray containing the states, in the same order as Energy
        pb (bool) : optionally show progress bar, requires pyprind. Doesn't work in all environments (Sorry!)
    Returns:
        Energy (numpy.ndarray) : numpy.ndarray containing the eigenergies, as from numpy.linalg.eig
        States (numpy.ndarray): numpy.ndarray containing the states, in the same order as Energy E[x,i] -> States[x,:,i]
    '''
    ls = numpy.arange(States.shape[2], dtype="int")
    number_iterations = len(Energy[:, 0])
    if pb:
        bar = pyprind.ProgBar(number_iterations, monitor=True)
    for i in range(1, number_iterations):
        '''
        This loop sorts the eigenstates such that they maintain some
        continuity. Each eigenstate should be chosen to maximise the overlap
        with the previous.
        '''
        #calculate the overlap of the ith and jth eigenstates
        overlaps = numpy.einsum('ij,ik->jk',
                                numpy.conjugate(States[i - 1, :, :]),
                                States[i, :, :])
        orig2 = States[i, :, :].copy()
        orig1 = Energy[i, :].copy()
        #insert location of maximums into array ls
        numpy.argmax(numpy.abs(overlaps), axis=1, out=ls)
        for k in range(States.shape[2]):
            l = ls[k]
            if l != k:
                Energy[i, k] = orig1[l].copy()
                States[i, :, k] = orig2[:, l].copy()
        if pb:
            bar.update()
    if pb:
        print(bar)
    return Energy, States
Exemple #30
0
    def read_ds18(spread_sheet_name):
        """
        Method that is ued to take the DS18
        spreadsheet and turn spreadsheet into products
        :param spread_sheet_name: Name of the spreadsheet
        :return: array of objects
        """

        # TODO: Need to test the method
        # TODO: Make method more more versatile
        print('Reading DS18 Data Sheet')
        df = pd.read_excel(spread_sheet_name, "product_sheets")
        df_needed = pd.read_excel(spread_sheet_name, "for_store")
        ds18_products = []

        # ----------------------------
        ds18_needed = {}
        for i in df_needed.index:
            # Go through and get all of the models we need
            ds18_needed[(str(df_needed['Model'][i]).replace("-", ""))] = str(
                df_needed['Collection'][i])

        # print(ds18_needed)

        # ----------------------------

        total = 1
        all_total = 1
        bar = pyprind.ProgBar(len(df.index), monitor=True)
        for i in df.index:
            ds18_product = None
            if df['MSRP'][i] != 0 and (df['Model'][i].replace(
                    "-", "")) in ds18_needed:
                ds18_product = DS18Variants(
                    str(df['Model'][i]), str(df['Brand'][i]),
                    str(df['Name'][i]), str(df['M/C'][i]), str(df['MSRP'][i]),
                    str(df['Dealer'][i]),
                    str(ds18_needed[df['Model'][i].replace("-", "")]))
            if ds18_product is not None:
                ds18_products.append(ds18_product)
            total += 1
            all_total += 1
            bar.update()
        return ds18_products