def main():
    p = Pool(8)
    args = parse_arguments()
    sdf_dir = os.path.join(args.sdf_dir)
    list_of_sdf_files = [filename for filename in read_sdf_file(sdf_dir)]
    print(list_of_sdf_files)
    proc = Conformer_generator(args)
    t = TicToc()
    t.tic()

    for file in list_of_sdf_files:
        sdf_file_name = file.split('_')[0]
        print(sdf_file_name)
        full_directory_path = os.path.join(args.conformers_file_dir,
                                           '{}'.format(sdf_file_name))
        os.makedirs(full_directory_path)
        os.chdir(full_directory_path)
        try:
            molecule_object_from_sdf_file = MoleculeReader(
                os.path.join(sdf_dir, file))
            list_of_molecules = [m for m in molecule_object_from_sdf_file]

            p.map(proc.generate_conformer, list_of_molecules)
        except:
            print("can not read sdf file {}".format(file))

    t.toc()
    print(t.elapsed)
Exemple #2
0
    def update(self):

        #Update Stuff
        if not self.plan:
            x0 = drone_pos.pose.position.x
            y0 = drone_pos.pose.position.y
            _, _, yaw0 = euler_from_quaternion(
                (drone_pos.pose.orientation.x, drone_pos.pose.orientation.y,
                 drone_pos.pose.orientation.z, drone_pos.pose.orientation.w))
            t = TicToc()
            t.tic()
            path = self.proxy(x0, y0, yaw0, self.route[0]).plannedPath
            t.toc()
            print(t.elapsed)

            if len(path.poses) > 0:
                rospy.loginfo("pp success")
                self.first_pose = False
                self.plan = True
                print('We have sent the position!')
                return pt.common.Status.SUCCESS
        # if hitta == False:
            else:
                rospy.loginfo("pp failure")
                return pt.common.Status.FAILURE

        else:
            #rospy.loginfo("pp running")
            return pt.common.Status.SUCCESS
Exemple #3
0
    def start(self):
        """
        start the job

        the job includes the following things:

            * fetch new unverified revisions
            * score this revisions
            * filter all suspected bad revisions
            * insert revisions to table
        """

        t = TicToc()
        t.tic()

        sql_user_name = read_sql_user_name()
        wikishield_conn = WS(sql_user_name)
        wikishield_conn.start()
        lm = LangsManager()
        for lang_name in lm.langs_names:
            lang = lm.get_lang(lang_name)
            clf = WikiClassifier(lang, wikishield_conn.ctx)
            clf.learn(limit=None)
            file_path = WikiClassifier.PICKLE_FOLDER_PATH + '/' + lang_name + '.pickle'
            clf.pickle_to_file(file_path)

        wikishield_conn.close()

        t.toc()
        print("learn job summary: elapsed time = ", t.elapsed,
              "seconds")  #TODO: remove this
def main():
    p = multiprocessing.Pool(8)

    args = parse_arguments()
    sdf_dir = os.path.join(args.sdf_dir)
    list_of_sdf_files = [filename for filename in read_sdf_file(sdf_dir)]
    #print((list_of_sdf_files))

    t = TicToc()
    if list_of_sdf_files:
        p.map(test_func, list_of_sdf_files)
    t.tic()





        #molecule_object_from_sdf_file = MoleculeReader(os.path.join(sdf_dir, file))
        #list_of_molecules = [m for m in molecule_object_from_sdf_file]

        # for mol in molecule_object_from_sdf_file:
        #     conformers = generate_confs(mol, int(args.number_of_conformers), 8)
        #
        #     with MoleculeWriter('%s_conformers.mol2' % mol.identifier) as mol_writer:
        #         for c in conformers:
        #             mol_writer.write(c.molecule)






    t.toc()
    print(t.elapsed)
def opt_sat(sat_files, hh_sat, num_run):

    # uf_li=list(pathlib.Path(dir_name).glob("**/*.cnf"))

    p, pattern=os.path.split(sat_files)
    uf_li=[os.path.join(p, x) for x in os.listdir(p) if re.fullmatch(pattern, x)]
    num_instance=len(uf_li)

    uf_mutation=[[0]*num_run for _ in range(num_instance)]
    uf_mutation_to_max_goal=[[0]*num_run for _ in range(num_instance)]
    uf_goal=[[0]*num_run for _ in range(num_instance)]
    uf_runtime=[[0]*num_run for _ in range(num_instance)]

    uf_num=[[0]*num_instance for _ in range(2)]
    uf_global_ind=[list() for _ in range(2)]
    uf_mutation_li=[[list() for _ in range(num_instance)] for _ in range(2)]
    uf_goal_li=[[list() for _ in range(num_instance)] for _ in range(2)]
    uf_runtime_li=[[list() for _ in range(num_instance)] for _ in range(2)]

    t=TicToc()

    for instance_id in range(num_instance):
        logging.info("======NEXT INSTANCE:{}======".format(instance_id))

        sat=Sat(str(uf_li[instance_id]))
        sat.print_inner_var()

        for run_id in range(num_run):

            logging.info("------NEXT RUN:{}------".format(run_id))
            t.tic()
            sat.reset_solution()
            hh_sat.reset_benchmark(sat)
            hh_sat.optimize()
            hh_sat.stat()
            t.toc()

            uf_mutation[instance_id][run_id]=hh_sat.num_mutate
            uf_mutation_to_max_goal[instance_id][run_id]=hh_sat.num_mutate_to_max_goal
            uf_goal[instance_id][run_id]=hh_sat.max_goal
            uf_runtime[instance_id][run_id]=t.elapsed

            # find global optima within max_mutate
            if hh_sat.max_goal==sat.num_cla:
                uf_num[0][instance_id]+=1
                uf_global_ind[0].append((instance_id, run_id))
                uf_mutation_li[0][instance_id].append(hh_sat.num_mutate)
                uf_goal_li[0][instance_id].append(hh_sat.max_goal)
                uf_runtime_li[0][instance_id].append(t.elapsed)
            # didn't find global optima within max_mutate
            else:
                uf_num[1][instance_id]+=1
                uf_global_ind[1].append((instance_id, run_id))
                uf_mutation_li[1][instance_id].append(hh_sat.num_mutate)
                uf_goal_li[1][instance_id].append(hh_sat.max_goal)
                uf_runtime_li[1][instance_id].append(t.elapsed)

    return uf_li, (uf_mutation, uf_goal, uf_runtime, uf_mutation_to_max_goal), (uf_num, uf_global_ind, uf_mutation_li, uf_goal_li, uf_runtime_li)
Exemple #6
0
def task1(file_path):
    data = pd.read_csv(
        file_path,
        usecols=(
            0,  # age
            1,  # job
            15,  # p outcome
            4,  # balance
            5,  # default
            18,  # y - classifier
            7,  # loan - classifier
        ))
    timer = TicToc()

    # Data clean-up
    data = data_transform(data)
    print_table(data.head())

    X_data = (data.drop("y", 1))
    Y_data = (data["y"])
    A_data = (data.drop("loan", 1))
    B_data = (data["loan"])

    # Training/testing sets
    X_train = X_data[:-250]
    X_test = X_data[-250:]
    Y_train = Y_data[:-250]
    Y_test = Y_data[-250:]

    A_train = A_data[:-250]
    A_test = A_data[-250:]
    B_train = B_data[:-250]
    B_test = B_data[-250:]

    # Apply linear regression model
    # regr = linear_model.LinearRegression()
    timer.tic()
    regr = svm.SVC(gamma=0.5, C=100)
    regr.fit(X_train, Y_train)
    prediction_x = regr.predict(X_test)
    timer.toc()
    print(
        f"Y-classified: {evaluate_result(prediction_x, Y_test.values)}% matched in {timer.elapsed}s"
    )

    timer.tic()
    svc = svm.SVC(gamma=0.5, C=100)
    svc.fit(A_train, B_train)
    prediction_a = svc.predict(A_test)
    timer.toc()
    print(
        f"Loan-classified: {evaluate_result(prediction_a, B_test.values)}% matched in {timer.elapsed}s"
    )

    plt.show()
Exemple #7
0
def task6(file_path):
    data = pd.read_csv(
        file_path,
        usecols=(
            0,  # age
            1,  # job
            2,  # martial
            3,  # education
            7,  # loan
            18,  # y - classifier
        ))
    labels = data.columns.values
    timer = TicToc()

    # Data clean-up
    data = data_transform(data)

    print_table(data.head())

    X_data = (data.drop("y", 1))
    Y_data = (data["y"])

    # Training/testing sets
    X_train = X_data[:-10000]
    X_test = X_data[-10000:]
    Y_train = Y_data[:-10000]
    Y_test = Y_data[-10000:]

    A_train = X_data[:-10]
    A_test = X_data[-10:]
    B_train = Y_data[:-10]
    B_test = Y_data[-10:]

    dec_tree = DecisionTreeClassifier()
    dec_tree_underfit = DecisionTreeClassifier()

    timer.tic()
    dec_tree.fit(X_train, Y_train)
    prediction_dec = dec_tree.predict(X_test)
    timer.toc()
    print(
        f"DecisionTreeClassifier: {evaluate_result(prediction_dec, Y_test.values)}% matched in {timer.elapsed}s"
    )

    timer.tic()
    dec_tree_underfit.fit(A_train, B_train)
    prediction_underfit = dec_tree.predict(A_test)
    timer.toc()
    print(
        f"DecisionTreeClassifier-Underfit: {evaluate_result(prediction_underfit, B_test.values)}% matched in {timer.elapsed}s"
    )
Exemple #8
0
def task2(file_path):
    data = pd.read_csv(
        file_path,
        usecols=(
            0,  # age
            2,  # martial
        ))
    labels = data.columns.values
    colors = ["r.", "g.", "b."]
    timer = TicToc()

    # Data clean-up
    data = data_transform(data)
    print_table(data.head())

    classifier = KMeans(n_clusters=3)
    classifier.fit(data)
    center = classifier.cluster_centers_
    kmeans_labels = classifier.labels_

    timer.tic()
    print(f"INFO: Rendering {len(data)} data points - ")
    print("INFO: This may take a while...")
    for index in range(len(data)):
        if (0 < index and index % 1000 == 0):
            print(f"INFO: Completed {index} iterations")
        plt.plot(
            data[labels[0]][index],
            data[labels[1]][index],
            colors[kmeans_labels[index]],
            markersize=10,
        )
    timer.toc()
    print(
        f"Render time for all data points: {timer.elapsed}s ({seconds_to_minutes(timer.elapsed)}min)"
    )

    plt.scatter(
        center[:, 0],
        center[:, 1],
        marker="o",
        s=150,
    )
    plt.show()
Exemple #9
0
    def test_maze(self):
        size = 20
        random.seed(1)
        m = make_maze(size, size)
        w = len(m.split('\n')[0])
        h = len(m.split('\n'))

        start = (1, 1)  # we choose to start at the upper left corner
        goal = (w - 2, h - 2)  # we want to reach the lower right corner

        t = TicToc()
        t.tic()
        _path = list(MazeSolver(m).astar(start, goal))
        t.toc()
        # print("Github", t.elapsed)

        t.tic()
        path, length = astar(MazeProblem(m, start, goal))
        t.toc()
Exemple #10
0
def task7(file_path):
    data = pd.read_csv(
        file_path,
        usecols=(
            5,  # balance
            7,  # loan
            16,  # bank_arg1
            18,  # y - classifier
        ))
    labels = data.columns.values
    timer = TicToc()

    # Data clean-up
    data = data_transform(data)
    print_table(data.head())

    X_data = (data.drop("y", 1))
    Y_data = (data["y"])

    X_train = X_data[:-1]
    X_test = X_data[-1:]
    Y_train = Y_data[:-1]
    Y_test = Y_data[-1:]

    dec_tree = DecisionTreeClassifier()
    random_forest = RandomForestClassifier()

    timer.tic()
    dec_tree.fit(X_train, Y_train)
    prediction_dec = dec_tree.predict(X_test)
    timer.toc()
    print(
        f"DecisionTreeClassifier: {evaluate_result(prediction_dec, Y_test.values)}% matched in {timer.elapsed}s"
    )

    timer.tic()
    random_forest.fit(X_train, Y_train)
    prediction_for = random_forest.predict(X_test)
    timer.toc()
    print(
        f"RandomForestClassifier: {evaluate_result(prediction_for, Y_test.values)}% matched in {timer.elapsed}s"
    )
Exemple #11
0
def _run_exec(sandbox_directory, output, problem):
    t = TicToc()
    status = Status.NOT_RUN

    time_limit, mem_limit, _, _, _ = _read_config_file(problem)
    script_location = sandbox_directory + 'exec_script.sh'
    exec_location = sandbox_directory + output.split('/')[-1]

    print('script = {}\nexec = {}'.format(script_location, exec_location))

    t.tic()
    process = Popen([script_location, exec_location], stdout=PIPE, stderr=PIPE)
    stdout, stderr = process.communicate()
    elapsed = t.toc()

    if os.path.exists('./{}.out'.format(problem)):
        shutil.copy('./{}.out'.format(problem), sandbox_directory)
    else:
        status = Status.WRONG_ANSWER

    return_code = int(re.findall(r'\d+', stdout.decode('utf-8'))[0])
    print('return_code = {}'.format(return_code))

    if return_code == int(Status.SIGINT):
        status = Status.TLE
    elif return_code == int(Status.SIGSEGV):
        status = Status.SIGSEGV
        print("SEG FAULT")
    elif return_code == int(Status.SIGFPE):
        status = Status.SIGFPE
    elif return_code == int(Status.SIGABRT):
        status = Status.SIGABRT

    if status == Status.NOT_RUN and elapsed > time_limit:
        status = Status.TLE

    # Test for MLE

    if status == Status.NOT_RUN and return_code == 0:
        status = Status.OK

    return status, round(elapsed, 3), stdout
    def start(self):
        """
        start the job

        the job includes the following things:

            * fetch new unverified revisions
            * score this revisions
            * filter all suspected bad revisions
            * insert revisions to table
        """

        t = TicToc()
        t.tic()

        local_conn, wiki_conn = conn_mng.open_connections(self.lang)
        wiki_classifier = reload_classifier(self.lang.name)

        wikimedia_db, wikishield_db, wikimedia_api = conn_mng.init_sources(wiki_conn.ctx, local_conn.ctx, self.lang)
        
        max_rev_id = None

        revs, _ = wikimedia_db.fetch_natural_revs(self.lang, self._NUM_REVS, max_rev_id,
                                                  self._EX_PART_SIZE, self._MIN_PART_SIZE)

        for rev in revs:
            diff_text, page_title = wikimedia_api.fetch_rev_diff(rev['wiki_id'], rev['parent_id'])
            rev['page_title'] = page_title
            print(rev)
            words_content = extract_added_words(self.lang, diff_text)
            if len(words_content) > 0:
                score = wiki_classifier.score_rev(words_content)
                rev['score'] = bad_score = score[0]
                if bad_score >= self._MIN_BAD_SCORE:
                    wikishield_db.insert_rev(rev, diff_text, words_content)
        wikishield_db.commit()

        conn_mng.close_connections(local_conn, wiki_conn)

        t.toc()
        print("add_revs_job: elapsed time = ", t.elapsed, "seconds") #TODO: remove this
def BlobRemoveThread(
    blobRemoveQueue: queue.SimpleQueue,
    credentials: str,
    bucketName: str,
    isTrialRun: bool,
    logLevel: str,
    threadId: int
) -> None:
    # Set logging level and get logger
    setLogingLevel(logLevel)
    logger = logging.getLogger(__name__)

    # Get GCS bucket
    credentials = Credentials.from_service_account_info(json.loads(credentials))
    client = storage.Client(project=credentials.project_id, credentials=credentials)
    bucket = client.get_bucket(bucketName)

    # Create stats variables
    ticToc = TicToc()

    while True:
        # Get task
        task = blobRemoveQueue.get()
        if task is None:
            break

        # Extract task
        name: str = task[0]

        ticToc.tic()
        if not isTrialRun:
            try:
                bucket.delete_blob(name)
            except:
                pass

        logger.info('× {}, elapsed: {:.3f}s, thread: {}'.format(
            name,
            ticToc.toc(),
            threadId
        ))
Exemple #14
0
def main():
    """Used to test the Archipelago class.

    Tests 3 random seeds between 0 and 65535 with weathering values of 1, 3, 5 and sea_level values between -20 and 32
    in steps of 4.
    """
    t = TicToc()
    t.tic()
    args = []
    for seed in random.sample(range(0, int("0xFFFF", 16)), 3):
        for weathering in [1, 3, 5]:
            for sea_level in range(-20, 32, 4):
                sea_level = sea_level / 100  # range() can't be used to generate a list of floats
                args.append([seed, weathering, sea_level])
    pool = Pool(multiprocessing.cpu_count())
    print("Total archipelagos being generated:", len(args))
    pool.starmap(test, args)
    pool.close()
    pool.join()
    t.toc()
    print("Total time elapsed (seconds): {0:.2f}".format(t.elapsed))
def train_model(classifier,
                feature_vector_train,
                label,
                feature_vector_valid,
                is_neural_net=False):
    t = TicToc()  ## TicToc("name")
    t.tic()
    # fit the training dataset on the classifier
    classifier.fit(feature_vector_train, label)

    # predict the labels on validation dataset
    predictions = classifier.predict(feature_vector_valid)

    if is_neural_net:
        predictions = predictions.argmax(axis=-1)
    t.toc()
    print("Time elapsed to train: {}".format(t.elapsed))
    # return metrics.f1_score(predictions, valid_y) * 100
    tn, fp, fn, tp = metrics.confusion_matrix(predictions, valid_y).ravel()
    print("Confusion matrix results:")
    print("True neg: {}\nFalse pos: {}\nFalse neg: {}\nTrue pos:{}".format(
        tn, fp, fn, tp))
    print()
    return metrics.f1_score(predictions, valid_y) * 100
Exemple #16
0
def task3(file_path):
    binning3 = preprocessing.KBinsDiscretizer(n_bins=3)
    binning6 = preprocessing.KBinsDiscretizer(n_bins=6)
    binning9 = preprocessing.KBinsDiscretizer(n_bins=9)

    data = pd.read_csv(
        file_path,
        usecols=(
            7,  # loan - classifier
            16,  # bank_arg1
            18,  # y - classifier
        ))
    labels = data.columns.values
    timer = TicToc()

    # Data clean-up
    data = data_transform(data)

    print_table(data.head())

    X_data = (data.drop("bank_arg1", 1))
    Y_data = (data["bank_arg1"])

    # Training/testing sets
    X_train = X_data[:-2500]
    X_test = X_data[-2500:]
    Y_train = Y_data[:-2500]
    Y_test = Y_data[-2500:]

    timer.tic()
    binning3.fit(X_data)
    prediction3 = binning3.transform(X_data)
    timer.toc()

    timer.tic()
    binning6.fit(X_data)
    prediction6 = binning6.transform(X_data)
    timer.toc()

    timer.tic()
    binning9.fit(X_data)
    prediction9 = binning9.transform(X_data)
    timer.toc()

    # TODO: Fix evaluation for matrix
    acc3 = evaluate_result(prediction3, Y_data.values)
    acc6 = evaluate_result(prediction6, Y_data.values)
    acc9 = evaluate_result(prediction9, Y_data.values)

    print(f"Binning with 3 units: {acc3}% matched in {timer.elapsed}s")
    print(f"Binning with 6 units: {acc6}% matched in {timer.elapsed}s")
    print(f"Binning with 9 units: {acc9}% matched in {timer.elapsed}s")
Exemple #17
0
def listCommand(databaseFileName: str, prefix: str, logLevel: str, sortBy: str,
                reverseSort: bool, machineReadable: bool) -> None:
    def getKey(fileInfo: FileInfo):
        if sortBy == 'perm':
            return fileInfo.stats['mode']
        if sortBy == 'blobs':
            return len(fileInfo.blobIds)
        if sortBy == 'uid':
            return fileInfo.stats['uid']
        if sortBy == 'gid':
            return fileInfo.stats['gid']
        if sortBy == 'decrypt':
            return fileInfo.decryptedSize
        if sortBy == 'encrypt':
            return fileInfo.encryptedSize
        if sortBy == 'modification':
            return fileInfo.stats['mtime']
        if sortBy == 'path':
            return fileInfo.path
        return 0

    # Set process title
    setproctitle.setproctitle('ListCommand')

    # Set logging level and get logger
    setLogingLevel(logLevel)
    logger = logging.getLogger(__name__)

    # Print parameters
    logger.info(
        'database: {}, prefix: {}, sortBy: {}, reverseSort: {}, machineReadable: {}'
        .format(databaseFileName, prefix, sortBy, reverseSort,
                machineReadable))

    # Create stats variables
    ticToc = TicToc()
    ticToc.tic()

    # Get transient database
    database = Database.getTransientCopy(databaseFileName)

    # Get absolute path
    prefix = os.path.abspath(prefix)

    # Get file infos
    fileInfos = [
        database.getFile(path) for path in database.selectPaths(prefix)
    ]

    # Close database
    database.close()

    # Sort files based on key
    fileInfos.sort(key=getKey, reverse=reverseSort)

    # Print in different format for machines
    if machineReadable:
        for fileInfo in fileInfos:
            print('{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}'.format(
                fileInfo.stats['mode'], len(fileInfo.blobIds),
                fileInfo.stats['uid'], fileInfo.stats['gid'],
                fileInfo.decryptedSize, fileInfo.encryptedSize,
                fileInfo.stats['atime'], fileInfo.stats['mtime'],
                fileInfo.path))
    else:
        # Print a header for humans
        print('{:8} {:5} {:5} {:5} {:10}  {:10}  {:26} {}'.format(
            'Perm', 'Blobs', 'uid', 'gid', 'Decrypt', 'Encrypt',
            'Modification', 'Path'))
        for fileInfo in fileInfos:
            print('{:8} {:<5} {:<5} {:<5} {:>10}  {:>10}  {:26} {}'.format(
                oct(fileInfo.stats['mode']), len(fileInfo.blobIds),
                fileInfo.stats['uid'], fileInfo.stats['gid'],
                naturalsize(fileInfo.decryptedSize),
                naturalsize(fileInfo.encryptedSize),
                datetime.fromtimestamp(fileInfo.stats['mtime']).strftime('%c'),
                fileInfo.path))

    # Print elapsed time
    logger.info('elapsed: {:.3f}s'.format(ticToc.toc()))
Exemple #18
0
def generate_preprocessed_file():
    savingPath = get_preprocessed_file_path()
    if Path(savingPath).is_file() == False:
        timer = TicToc('preprocessing time')
        timer.tic()
        print('Generating preprocessed dataset file started ..........')
        ds_path = "../dataset/Mobile_App_Success_Milestone_2.csv"  # Training
        if ds_path.__contains__('xlsx'):
            dataset = pd.read_excel(ds_path, parse_dates=['Last Updated'])
        else:
            dataset = pd.read_csv(ds_path,
                                  parse_dates=['Last Updated'],
                                  low_memory=False)
        dataset.dropna(how='any', inplace=True)

        dataset['App_Rating'] = dataset['App_Rating'].str.replace(
            'Low_Rating', '1')
        dataset['App_Rating'] = dataset['App_Rating'].str.replace(
            'Intermediate_Rating', '2')
        dataset['App_Rating'] = dataset['App_Rating'].str.replace(
            'High_Rating', '3')
        dataset['App_Rating'] = pd.to_numeric(dataset['App_Rating'],
                                              downcast='integer',
                                              errors='coerce')
        dataset['Last Updated'] = pd.to_datetime(dataset['Last Updated'],
                                                 errors='coerce')
        most_freq_date = dataset['Last Updated'].value_counts().idxmax()
        dataset['Last Updated'] = dataset['Last Updated'].fillna(
            most_freq_date)
        dataset['Price'] = dataset['Price'].str.replace('$', '')
        dataset['Installs'] = dataset['Installs'].str.replace('+', '')
        dataset['Installs'] = dataset['Installs'].str.replace(',', '')

        dataset['Reviews'] = pd.to_numeric(dataset['Reviews'],
                                           downcast='integer',
                                           errors='coerce')
        dataset['Price'] = pd.to_numeric(dataset['Price'],
                                         downcast='float',
                                         errors='coerce')
        dataset['Installs'] = pd.to_numeric(dataset['Installs'],
                                            downcast='integer',
                                            errors='coerce')
        dataset.fillna(0)
        dataset["Size"] = dataset["Size"].str.replace(
            "Varies with device",
            str(len(dataset[dataset['Size'] == "Varies with device"])))
        dataset["Size"] = dataset["Size"].str.replace(",", "")
        dataset["Size"] = dataset["Size"].str.replace("+", "")
        dataset["Size"] = (dataset["Size"].replace(r'[kM]+$', '', regex=True).astype(float) *\
                dataset["Size"].str.extract(r'[\d\.]+([kM]+)', expand=False).fillna(1).replace(['k','M'], [10**3, 10**6]).astype(int))
        dataset.fillna(0)
        contentRatings = dataset['Content Rating'].unique()
        for Uval in contentRatings:
            meanVal = len(dataset[dataset['Content Rating'] == Uval])
            dataset['Content Rating'] = dataset['Content Rating'].str.replace(
                Uval, str(meanVal))

        Categories = dataset['Category'].unique()
        for Uval in Categories:
            repeatingTimes = len(dataset[dataset['Category'] == Uval])
            dataset['Category'] = dataset['Category'].str.replace(
                Uval, str(repeatingTimes))

#        for i in range(len(dataset)) :
#            dataset.iloc[ i , 7] = datetime.timestamp(dataset.iloc[ i , 7]) # Last Update
        dataset['Content Rating'] = pd.to_numeric(dataset['Content Rating'],
                                                  downcast='float',
                                                  errors='coerce')
        dataset['Category'] = pd.to_numeric(dataset['Category'],
                                            downcast='integer',
                                            errors='coerce')
        cols = ('App Name', 'Last Updated', 'Minimum Version',
                'Latest Version')
        dataset = Feature_Encoder(dataset, cols)
        cols = dataset.columns
        for c in cols:
            dataset[c] = dataset[c].fillna(dataset[c].value_counts().idxmax())
        dataset.to_csv(path_or_buf=savingPath, index=False)
        print('Preprocessed File Generated Successfully.......')
        timer.toc()
        print('Preprocessing Time : ' + str(round(timer.elapsed / 60, 5)) +
              ' Minutes')
        return savingPath
    else:
        return get_preprocessed_file_path()
Exemple #19
0
def ChunkEncryptionProcess(chunkEncryptionQueue: multiprocessing.SimpleQueue,
                           blobInfoQueue: multiprocessing.SimpleQueue,
                           credentials: str, bucketName: str, processId: int,
                           nUploadThreads: int, uploadQueueMiB: int) -> None:
    def readChunk(path: str,
                  offset: int,
                  length: int = 1024 * 1024 * 32) -> bytes:
        fp = open(path, 'rb')
        assert offset == fp.seek(offset)
        chunk = fp.read(length)
        fp.close()
        return chunk

    def encryptChunk(chunk: bytes) -> list:
        # Choose a random name
        name = ''.join(random.choice('0123456789abcdef') for i in range(32))

        # Get decrypted stats
        decryptedSize = len(chunk)
        decryptedSha256 = hashlib.sha256(chunk).digest()

        # Generate key and encrypt chunk
        # Discard decrypted chunk at the same time
        encryptionKey = Fernet.generate_key()
        chunk = Fernet(encryptionKey).encrypt(chunk)

        # Get encrypted stats
        encryptedSize = len(chunk)
        encryptedSha256 = hashlib.sha256(chunk).digest()

        # Generate blob info
        blobInfo = BlobInfo(name, encryptionKey, encryptedSize,
                            encryptedSha256, decryptedSize, decryptedSha256)

        return [chunk, blobInfo]

    # Start blob upload threads
    blobUploadQueueBytes = ThreadValueLock(1024 * 1024 * uploadQueueMiB)
    blobUploadQueue = queue.SimpleQueue()
    blobUploadThreads = []
    for threadId in range(nUploadThreads):
        thread = Thread(target=BlobUploadThread,
                        args=[
                            blobUploadQueue, blobUploadQueueBytes, credentials,
                            bucketName, threadId
                        ],
                        name='BlobUploadThread{}'.format(threadId))
        thread.start()
        blobUploadThreads.append(thread)

    # Create stats variable
    ticToc = TicToc()

    # Process tasks until received None
    while True:
        # Set process title
        setproctitle.setproctitle('ChunkEncryptionProcess{}'.format(processId))

        # Get task
        task = chunkEncryptionQueue.get()
        if task is None:
            break

        # Start measuring time used for encryption
        elapsed = []
        ticToc.tic()

        # Extract task
        path: str = task[0]
        offset: int = task[1]
        fileSize: int = task[2]

        # Update process title
        setproctitle.setproctitle(
            'ChunkEncryptionProcess{} {}, chunk {}/{}'.format(
                processId, path, offset // (1024 * 1024 * 32),
                math.ceil(fileSize / (1024 * 1024 * 32))))

        # Read chunk
        chunk = readChunk(path, offset)

        # Encrypt chunk
        # Discard decrypted chunk at the same time
        result: list = encryptChunk(chunk)
        chunk: bytes = result[0]
        blobInfo: BlobInfo = result[1]

        # Stop measuring time used for encryption
        elapsed.append(ticToc.toc())

        # Start measuring time used waiting for upload queue
        ticToc.tic()

        # Send encrypted chunk to blob upload threads
        blobUploadQueueBytes.acquire(len(chunk))
        blobUploadQueue.put([blobInfo.name, chunk])

        # Stop measuring time used waiting for upload queue
        elapsed.append(ticToc.toc())

        # Send blob info to blob info collection process
        blobInfoQueue.put([
            path, offset, blobInfo, elapsed,
            blobUploadQueue.qsize(),
            blobUploadQueueBytes.getValue()
        ])

    # Stop blob upload threads
    for _ in range(nUploadThreads):
        blobUploadQueue.put(None)
    for thread in blobUploadThreads:
        thread.join()
Exemple #20
0
class SVMLR_FrankWolfe(object):

    def __init__(self,
                 nb_labels,
                 nb_instances,
                 DEBUG=False,
                 DEBUG_SOLVER=False,
                 is_shared_H_memory=False,
                 SOLVER_LP='cvxopt',
                 startup_idx_save_disk=None,
                 process_dot_Hxt=1):
        self._logger = create_logger("__SVMLR_FrankWolfe", DEBUG)
        self.nb_labels = nb_labels
        self.nb_instances = nb_instances
        self.nb_preferences = int(self.nb_labels * (self.nb_labels - 1) * 0.5)
        self.d_size = self.nb_preferences * self.nb_instances
        self._t = TicToc("__SVMLR_FrankWolfe")
        self._t.set_print_toc(False)
        solvers.options["show_progress"] = DEBUG_SOLVER
        self._trace_convergence = []
        self.is_shared_H_memory = is_shared_H_memory
        self.DEBUG = DEBUG
        # variables to create matrix H in parallel and shared memory and disk
        self.name_matrix_H = "sparse_H_" + str(int(time.time()))
        self.in_temp_path = "/tmp/"
        self.startup_idx_save_disk = int(self.nb_preferences * 0.5 + 1) \
            if startup_idx_save_disk is None \
            else startup_idx_save_disk
        self.SOLVER_LP_DEFAULT = SOLVER_LP
        self.process_dot_Hxt = process_dot_Hxt
        if self.process_dot_Hxt > 0:
            self.pool = multiprocessing.Pool(processes=self.process_dot_Hxt)

    def get_alpha(self, A, q, v, max_iter=100, tol=1e-8):
        # 0. calculate the large matrix des
        # it is shared memory, we use the H global variable to speed to multiplication bigger matrix
        H = self.calculate_H(q, A)
        # self._logger.debug("Is it semi-definite positive matrix (%s)", is_symmetric(H))

        # 1. Set the constraints for the dual problem
        e_i = self.nb_preferences
        max_limit = float(v / e_i)

        # 2. Call wrapper linear programing solver
        lp_programing = self.__wrapper_lp_solvers(0, max_limit, solver=self.SOLVER_LP_DEFAULT)

        # 3. Frank-Wolf algorithm
        x_t = np.zeros(self.d_size)  # init value for algorithm frank-wolfe
        c = np.repeat(-1.0, self.d_size)
        g_t, it = 0, 0

        for it in range(max_iter):
            # Step 0: compute gradient of the sparse matrix
            grad_fx = self.compute_H_dot_x_grad(x_t, H, c)

            # Step 1: direction-finding sub-problem
            s_t = lp_programing(grad_fx)
            d_t = s_t - x_t
            g_t = -1 * (grad_fx.dot(d_t))
            # verify if gap is below tolerance
            if g_t <= tol:
                break
            # Step 2: set step size by line search
            Hd_t = self.compute_H_dot_x_grad(d_t, H)
            z_t = d_t.dot(Hd_t)
            step_size = min(-1 * (c.dot(d_t) + x_t.dot(Hd_t)) / z_t, 1.)
            # Step 3: update current value
            x_t = x_t + step_size * d_t
            if self.DEBUG:
                self._trace_convergence.append(g_t)
                self._logger.debug("Gradient-cost-iteration (it, grad) (%s, %s)", it, g_t)

        self._logger.debug("Cost-Fx-gradient and #iters (grad_fx, iters, is_optimal) (%s, %s, %s)",
                           g_t, it, it + 1 < max_iter)
        self._logger.debug("Vector solution alphas (%s)", x_t)
        return x_t

    # async def dot_xt_Hr_preference(self, x_t, H):
    #     async def one_iteration(r):
    #         return H[r] @ x_t + H[r].T @ x_t
    #
    #     coros_or_futures = [one_iteration(r) for r in range(0, self.startup_idx_save_disk - 1)]
    #     res = await asyncio.gather(*coros_or_futures)
    #     return np.sum(res, axis=0)

    def compute_H_dot_x_grad(self, x_t, H, add_vec=None):
        # Gradient evaluate in current value x_t
        grad_fx = np.zeros(self.d_size)
        if self.is_shared_H_memory:  # multiprocessing inner product space

            def dot_xt_Hr_preference(H, x_t, r):
                self._logger.debug("Dot-inner-product preference (H_%s @ x_t)", r)
                return H @ x_t + H.T @ x_t

            __thread_dot = [None] * (self.startup_idx_save_disk - 1)
            for i in range(self.startup_idx_save_disk - 1):
                __thread_dot[i] = ThreadWithReturnValue(target=dot_xt_Hr_preference, args=(H[i], x_t, i,))
                __thread_dot[i].start()

            if self.process_dot_Hxt > 1:

                # (1) asynchronous at testing
                # import asyncio
                # loop = asyncio.new_event_loop()
                # inner_product = loop.run_until_complete(self.dot_xt_Hr_preference(x_t, H))
                # grad_fx = grad_fx + inner_product
                # loop.close()

                # (2) multiprocessing dot calculation
                # It's not possible because it does not share memory very well (until python-3.8)
                # fnc_target = partial(dot_xt_Hr_preference, x_t)
                # res = self.pool.map(fnc_target, range(0, self.startup_idx_save_disk - 1))
                # for x_memory in res:
                #     grad_fx = grad_fx + x_memory

                func_target = partial(dot_xt_Hr_from_disk_hard, x_t, self.name_matrix_H, self.in_temp_path)
                res = self.pool.map(func_target, range(self.startup_idx_save_disk - 1, self.nb_preferences))
                for x_r in res:
                    grad_fx = grad_fx + x_r

            else:  # singleton processing
                # for r in range(0, self.startup_idx_save_disk - 1):
                #     grad_fx = grad_fx + H[r] @ x_t + H[r].T @ x_t

                for r in range(self.startup_idx_save_disk - 1, self.nb_preferences):
                    H_disk = load_npz(self.in_temp_path + self.name_matrix_H + "_" + str(r + 1) + ".npz")
                    x_disk = H_disk @ x_t + H_disk.T @ x_t
                    grad_fx = grad_fx + x_disk

            for _dot in __thread_dot:
                grad_fx = grad_fx + _dot.join()
        else:
            grad_fx = H @ x_t + H.T @ x_t

        if add_vec is not None:
            grad_fx = grad_fx + add_vec

        return grad_fx

    def calculate_H(self, q, A):
        if self.is_shared_H_memory:
            return sparse_matrix_H_shared_memory_and_disk(q, A,
                                                          self.nb_preferences,
                                                          self.nb_instances,
                                                          self.name_matrix_H,
                                                          self.startup_idx_save_disk,
                                                          self.in_temp_path)
        else:
            return self.all_sparse_symmetric_H(q, A)

    def all_sparse_symmetric_H(self, q, A):
        self._logger.debug('Size H-matrix (nb_preference, nb_instances, d_size) (%s, %s, %s)',
                           self.nb_preferences, self.nb_instances, self.nb_preferences * self.nb_instances)

        data_coo = None
        for r in range(0, self.nb_preferences):
            rows, cols, data = array.array('i'), array.array('i'), array.array('d')

            def append(i, j, d):
                rows.append(i)
                cols.append(j)
                data.append(d)

            for l in range(r, self.nb_preferences):
                self._t.tic()
                for i in range(0, self.nb_instances):
                    _i = i if r == l else 0
                    for j in range(_i, self.nb_instances):
                        list_pq = q[i][r]
                        list_ab = q[j][l]
                        # creation index (row, column)
                        i_row = self.nb_instances * r + i
                        i_col = self.nb_instances * l + j
                        cell_data = A[i, j]
                        # put half value to diagonal matrix to use H + H.T
                        if i_row == i_col and r == l:
                            cell_data = 0.5 * cell_data

                        if list_pq[0] == list_ab[0]:
                            append(i_row, i_col, cell_data)

                        elif list_pq[0] == list_ab[1]:
                            append(i_row, i_col, -1 * cell_data)

                        elif list_pq[1] == list_ab[0]:
                            append(i_row, i_col, -1 * cell_data)

                        elif list_pq[1] == list_ab[1]:
                            append(i_row, i_col, cell_data)

                self._logger.debug('Time pair-wise preference label (%s, %s, %s)',
                                   'P' + str(r + 1), 'P' + str(l + 1), self._t.toc())
            if data_coo is not None:
                data.extend(data_coo.data)
                rows.extend(data_coo.row)
                cols.extend(data_coo.col)
            rows = np.frombuffer(rows, dtype=np.int32)
            cols = np.frombuffer(cols, dtype=np.int32)
            data = np.frombuffer(data, dtype='d')
            data_coo = coo_matrix((data, (rows, cols)), shape=(self.d_size, self.d_size))

        return data_coo.tocsr()

    def __wrapper_lp_solvers(self, lower_bound, upper_bound, solver='cvxopt'):
        self._logger.debug("Linear solver selected (%s)", solver)
        if solver == 'cvxopt':
            def __executing(grad_fx):
                res = solvers.lp(matrix(grad_fx, (self.d_size, 1)), G=G, h=h)
                if res['status'] != 'optimal':
                    self._logger.info("[Solution-not-Optimal-Not-convergence] v_default (%s)", v)
                return np.array([v for v in res["x"]])

            # 1. Create bound constraint for linear programming
            x_bound_upper = spmatrix(1.0, range(self.d_size), range(self.d_size))
            x_bound_lower = spmatrix(-1.0, range(self.d_size), range(self.d_size))
            G = sparse([x_bound_upper, x_bound_lower])
            h = matrix(np.hstack([np.repeat(upper_bound, self.d_size), -np.zeros(self.d_size)]))
            return __executing
        elif solver == 'scipy':
            def __executing(grad_fx):
                res = linprog(grad_fx, bounds=(lower_bound, upper_bound))
                if res['status'] != 0:
                    self._logger.info("[Solution-not-Optimal-Not-convergence] v_default (%s)", v)
                return np.array([v for v in res["x"]])

            return __executing
        elif solver == 'salmuz':
            def __lp_with_box_constraint(c):
                lp_solution_optimal = np.zeros(c.shape)
                idx_negative_value = np.where(c < 0)[0]
                if len(idx_negative_value) == 0:
                    return lp_solution_optimal
                lp_solution_optimal[idx_negative_value] = upper_bound
                return lp_solution_optimal

            return __lp_with_box_constraint
        else:
            raise Exception('Solver has not implemented yet')

    def plot_convergence(self):
        plt.plot(self._trace_convergence, lw=1)
        plt.yscale('log')
        plt.xlabel('Number of iterations')
        plt.ylabel('Relative Frank-Wolf gap')
        plt.title('Convergence QP')
        plt.grid()
        plt.show()

    def __del__(self):
        # remove temporal files where it save the sparse matrix
        if self.is_shared_H_memory:
            for r in range(self.startup_idx_save_disk - 1, self.nb_preferences):
                os.remove(self.in_temp_path + self.name_matrix_H + "_" + str(r + 1) + ".npz")
        # release pool process after computations
        if self.pool is not None:
            self.pool.close()
Exemple #21
0
def task4(file_path):
    data = pd.read_csv(
        file_path,
        usecols=(
            0,  # age
            1,  # job
            2,  # martial
            3,  # education
            7,  # loan
            18,  # y - classifier
        ))
    labels = data.columns.values
    timer = TicToc()

    # Data clean-up
    data = data_transform(data)

    print_table(data.head())

    X_data = (data.drop("y", 1))
    Y_data = (data["y"])

    # Training/testing sets
    X_train = X_data[:-10000]
    X_test = X_data[-10000:]
    Y_train = Y_data[:-10000]
    Y_test = Y_data[-10000:]

    kneighbors = KNeighborsClassifier()
    dec_tree = DecisionTreeClassifier()
    gauss = GaussianNB()
    svc = svm.SVC()
    random_forest = RandomForestClassifier()

    timer.tic()
    kneighbors.fit(X_train, Y_train)
    prediction_kn = kneighbors.predict(X_test)
    timer.toc()
    print(
        f"KNeighborsClassifier: {evaluate_result(prediction_kn, Y_test.values)}% matched in {timer.elapsed}s"
    )

    timer.tic()
    dec_tree.fit(X_train, Y_train)
    prediction_dec = dec_tree.predict(X_test)
    timer.toc()
    print(
        f"DecisionTreeClassifier: {evaluate_result(prediction_dec, Y_test.values)}% matched in {timer.elapsed}s"
    )

    timer.tic()
    gauss.fit(X_train, Y_train)
    prediction_g = gauss.predict(X_test)
    timer.toc()
    print(
        f"GaussianNB: {evaluate_result(prediction_g, Y_test.values)}% matched in {timer.elapsed}s"
    )

    timer.tic()
    svc.fit(X_train, Y_train)
    prediction_svc = svc.predict(X_test)
    timer.toc()
    print(
        f"svm.SVC: {evaluate_result(prediction_svc, Y_test.values)}% matched in {timer.elapsed}s"
    )

    timer.tic()
    random_forest.fit(X_train, Y_train)
    prediction_for = random_forest.predict(X_test)
    timer.toc()
    print(
        f"RandomForestClassifier: {evaluate_result(prediction_for, Y_test.values)}% matched in {timer.elapsed}s"
    )
Exemple #22
0
def BlobInfoCollectionProcess(blobInfoQueue: multiprocessing.SimpleQueue,
                              databaseFileName: str, logLevel: str,
                              nEncryptionWorkers: int,
                              nFileHashingWorkers: int) -> None:
    def sendToDatabase(database: Database, logger: logging.Logger, path: str,
                       record: list, nEncryptionWorkers: int) -> int:
        # Sort blob info according to offset
        blobInfoPairs: list = record[2]
        blobInfoPairs.sort(key=lambda blobInfoPair: blobInfoPair[0])
        blobInfos = [blobInfoPair[1] for blobInfoPair in blobInfoPairs]

        # Create stats variables
        decryptedSize = 0
        encryptedSize = 0

        # Insert blob info into database
        blobIds = []
        for blobInfo in blobInfos:
            blobId = database.setBlob(blobInfo)
            blobIds.append(blobId)

            decryptedSize += blobInfo.decryptedSize
            encryptedSize += blobInfo.encryptedSize
        assert decryptedSize == record[0]

        # Wait and get hash of file
        filehashingResult: multiprocessing.pool.AsyncResult = record[4]
        filehashingResult.wait()
        sha256 = filehashingResult.get()

        # Get stat of file
        osStat = os.stat(path)
        stats = {
            'mode': osStat.st_mode,
            'uid': osStat.st_uid,
            'gid': osStat.st_gid,
            'atime': osStat.st_atime,
            'mtime': osStat.st_mtime,
        }

        # Insert file info into database
        fileInfo = FileInfo(path, sha256, stats, encryptedSize, decryptedSize,
                            blobIds)
        database.setFile(fileInfo)

        # Print info
        elapsed = record[3]
        logger.info(
            '+ {}, before: {}, after: {}, blobs: {}, elapsed: {:.3f}s, speed: {}/s {}/s'
            .format(path, naturalsize(decryptedSize),
                    naturalsize(encryptedSize), len(blobIds), elapsed,
                    naturalsize(decryptedSize / elapsed * nEncryptionWorkers),
                    naturalsize(encryptedSize / elapsed * nEncryptionWorkers)))

        return encryptedSize

    # Set process title
    setproctitle.setproctitle('BlobInfoCollectionProcess')

    # Set logging level and get logger
    setLogingLevel(logLevel)
    logger = logging.getLogger(__name__)

    # Open database
    database = Database(databaseFileName)

    # Create record pool
    recordPool = dict()

    # Create file hashing pool
    fileHashingPool = Pool(processes=nFileHashingWorkers)

    # Create stats variables
    decryptedSize = 0
    encryptedSize = 0
    ticToc = TicToc()

    # Process tasks until received None
    ticToc.tic()
    while True:
        # Get task
        task = blobInfoQueue.get()
        if task is None:
            break

        # Extract task
        path: str = task[0]
        offset: int = task[1]
        blobInfo: BlobInfo = task[2]
        elapsed: list = task[3]
        uploadQueueSize: int = task[4]
        uploadQueueBytes: int = task[5]

        # Print info
        logger.info(
            '+ {}, before: {}, after: {}, elapsed: {:.3f}s {:.3f}s, speed: {}/s {}/s, queue: {} {}'
            .format(blobInfo.name, naturalsize(blobInfo.decryptedSize),
                    naturalsize(blobInfo.encryptedSize),
                    elapsed[0], elapsed[1],
                    naturalsize(blobInfo.decryptedSize / elapsed[0]),
                    naturalsize(blobInfo.encryptedSize / elapsed[0]),
                    uploadQueueSize, naturalsize(uploadQueueBytes)))

        # Get file record from pool
        if path in recordPool:
            record = recordPool[path]
        # Create a new record if there is no record
        else:
            # Start hashing the file
            filehashingResult = fileHashingPool.apply_async(
                func=FileHashingPoolFunction, args=[path])

            # Create new record
            record = [
                os.stat(path).st_size,  # File size
                0,  # Total bytes processed
                [],  # BlobInfo[]
                0,  # Total elapsed time processing the file
                filehashingResult  # Async hashing result
            ]

            # Insert new record
            recordPool[path] = record

        # Update record
        record[1] += blobInfo.decryptedSize
        record[2].append([offset, blobInfo])
        record[3] += elapsed[0] + elapsed[1]

        # If file is completely processed, send file record to database
        if record[0] == record[1]:
            # Update process title
            setproctitle.setproctitle(
                'BlobInfoCollectionProcess {}'.format(path))

            decryptedSize += record[0]
            encryptedSize += sendToDatabase(database, logger, path, record,
                                            nEncryptionWorkers)

            # Remove file record from pool
            recordPool.pop(path)

            # Reset process title
            setproctitle.setproctitle('BlobInfoCollectionProcess')

    # Stop timer
    elapsed = ticToc.toc()

    # Close file hashing pool
    fileHashingPool.close()
    fileHashingPool.join()

    # Close database
    database.commit()
    database.close()

    # Print info
    logger.info('elapsed: {:.3f}s, avg speed: {}/s {}/s'.format(
        elapsed, naturalsize(decryptedSize / elapsed),
        naturalsize(encryptedSize / elapsed)))
Exemple #23
0
class SVMLR_QP(object):
    def __init__(self,
                 nb_labels,
                 nb_instances,
                 DEBUG=False,
                 DEBUG_SOLVER=False):
        self._logger = create_logger("__SVMLR_QP", DEBUG)
        self.nb_labels = nb_labels
        self.nb_instances = nb_instances
        self._t = TicToc("__SVMLR_QP")
        self._t.set_print_toc(False)
        solvers.options["show_progress"] = DEBUG_SOLVER

    @timeit
    def get_alpha(self, A, q, v):
        """
        :return: list of alpha, size k(k-1)/2
        """
        # 1. Calculate matrix H
        # h = self.old_calculate_H(q, data)
        h_numpy = self.calculate_H(q, A)
        # np.set_printoptions(linewidth=125)
        # print(np.array(matrix(h_numpy)))
        # np.savetxt("mat_qd.txt", np.array(matrix(h_numpy)), fmt='%0.5f')

        # 2.Set the constraints for the dual problem
        e_i = int(0.5 * self.nb_labels * (self.nb_labels - 1))
        max_limit = float(v / e_i)
        size_H = int(0.5 * self.nb_labels * (self.nb_labels - 1) *
                     self.nb_instances)
        res = self.min_convex_qp(
            h_numpy,
            np.repeat(-1.0, size_H),
            np.repeat(0.0, size_H),
            np.repeat(max_limit, size_H),
            size_H,
        )

        solution = np.array([v for v in res["x"]])

        if res['status'] != 'optimal':
            self._logger.info(
                "[Solution-not-Optimal-Not-convergence] v_default (%s)", v)

        return solution

    @timeit
    def calculate_H(self, q, A):
        """
        :param A: numpy array
        :param q: list Q
        :return: Matrix H
        """
        row, col, data = [], [], []
        nb_preferences = int(self.nb_labels * (self.nb_labels - 1) * 0.5)

        self._logger.debug('Size H-matrix (%s, %s, %s)', nb_preferences,
                           self.nb_instances,
                           nb_preferences * self.nb_instances)
        for r in range(0, nb_preferences):
            for l in range(r, nb_preferences):
                self._t.tic()
                for j in range(0, self.nb_instances):
                    _j = j if r == l else 0
                    for i in range(_j, self.nb_instances):
                        list_pq = q[i][r]
                        list_ab = q[j][l]
                        # creation index (row, column)
                        i_row = self.nb_instances * r + i
                        i_col = self.nb_instances * l + j
                        cell_data = A[i, j]

                        if list_pq[0] == list_ab[0]:
                            if i_col == i_row:
                                row.append(i_row)
                                col.append(i_col)
                                data.append(cell_data)
                            else:
                                row.extend((i_row, i_col))
                                col.extend((i_col, i_row))
                                data.extend((cell_data, cell_data))

                        elif list_pq[0] == list_ab[1]:
                            if i_col == i_row:
                                row.append(i_row)
                                col.append(i_col)
                                data.append(-1 * cell_data)
                            else:
                                row.extend((i_row, i_col))
                                col.extend((i_col, i_row))
                                data.extend((-1 * cell_data, -1 * cell_data))

                        elif list_pq[1] == list_ab[0]:
                            if i_col == i_row:
                                row.append(i_row)
                                col.append(i_col)
                                data.append(-1 * cell_data)
                            else:
                                row.extend((i_row, i_col))
                                col.extend((i_col, i_row))
                                data.extend((-1 * cell_data, -1 * cell_data))

                        elif list_pq[1] == list_ab[1]:
                            if i_col == i_row:
                                row.append(i_row)
                                col.append(i_col)
                                data.append(cell_data)
                            else:
                                row.extend((i_row, i_col))
                                col.extend((i_col, i_row))
                                data.extend((cell_data, cell_data))
                self._logger.debug(
                    'Time pair-wise preference label (%s, %s, %s)',
                    'P' + str(r + 1), 'P' + str(l + 1), self._t.toc())

        size_H = int(nb_preferences * self.nb_instances)
        mat_h = spmatrix(data, row, col, size=(size_H, size_H))
        # self._logger.debug("Full matrix(mat_a)\n %s", mat_a)
        # for verification with old version
        # np.savetxt("mat_h.txt", matrix(mat_h), fmt='%0.3f')

        return mat_h

    def min_convex_qp(self, H, q, lower, upper, d):
        ell_lower = matrix(lower, (d, 1))
        ell_upper = matrix(upper, (d, 1))
        q = matrix(q, (d, 1))
        I = matrix(0.0, (d, d))
        I[::d + 1] = 1
        G = matrix([I, -I])
        h = matrix([ell_upper, -ell_lower])
        # solvers.options["refinement"] = 2
        solvers.options["kktreg"] = 1e-9
        # https://groups.google.com/forum/#!msg/cvxopt/Umcrj8UD20g/iGY4z5YgDAAJ
        return solvers.qp(P=H,
                          q=q,
                          G=G,
                          h=h,
                          kktsolver="ldl",
                          options=solvers.options)

    def plot_convergence(self):
        raise Exception("Not implemented yet")
class TagFinder():
    def __init__(self):
        self.in_between_wait = .05
        self.driver = self.setup_selenium_driver()
        self.taggees = {}
        self.successful_retrievals = 0
        self.first_url = ""
        self.MAX_RUN_TIME = 360
        self.timer = TicToc()

    def setup_selenium_driver(self):
        usr = os.getenv('fbusername')
        pwd = os.getenv('fbpassword')
        PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__))
        DRIVER_BIN = os.path.join(PROJECT_ROOT, "chromedriver")

        driver = webdriver.Chrome(executable_path=DRIVER_BIN)
        # driver = webdriver.Firefox(
        #     executable_path='/Users/mattross/webdriver/gecko/v0.26.0/geckodriver-v0.26.0-macos/geckodriver')

        # driver = webdriver.Chrome()
        driver.get('https://www.facebook.com/')
        print("Opened facebook")
        sleep(.1)

        username_box = driver.find_element_by_id('email')
        username_box.send_keys(usr)
        print("Email Id entered")
        sleep(.21)

        password_box = driver.find_element_by_id('pass')
        password_box.send_keys(pwd)
        print("Password entered")

        login_box = driver.find_element_by_id('loginbutton')
        login_box.click()

        sleep(.1)

        return driver

    def get_name_of_tagger(self, driver):
        while True:
            try:
                tager = driver.find_element_by_class_name(taggee_class).text
                if tager is not None:

                    try:
                        tag_date = driver.find_element_by_class_name(
                            "_39g5").text
                        if not self.taggees[tager]["last_tagged"]:
                            self.taggees[tager]["last_tagged"] = tag_date
                        self.taggees[tager]["first_tagged"] = tag_date
                    except Exception as e:
                        # print(e)
                        print("")
                        # print("could not get date ")

                    # Manage Current Speed Of Timer
                    if self.successful_retrievals < 6:
                        self.successful_retrievals += 1
                    else:
                        # print("Retrieved 6 photos in a row successfully, resetting sleep timer")
                        self.successful_retrievals = 0
                        self.in_between_wait = .05

                    if driver.current_url != self.first_url:
                        return tager
                    else:
                        self.stop_iteration_and_display_data()
            except Exception as e:

                # print("Could not load page fast enough, trying again {}".format(e))
                self.in_between_wait += .1
                # print("increasing global wait time to {}".format(self.in_between_wait))
                sleep(self.in_between_wait)
                driver.find_element_by_css_selector('body').send_keys(
                    Keys.ARROW_RIGHT)

    def find_all_tagged_photos(self):

        driver = self.driver
        profile_url = "https://www.facebook.com/Ross.Ross.1080/photos"
        profile_url = "https://www.facebook.com/steviedunbardude/photos"
        profile_url = "https://www.facebook.com/mary.notari"
        if "/photos" not in profile_url:
            profile_url += "/photos"
        driver.get(profile_url)

        sleep(.5)
        driver.find_element_by_css_selector('body').click()
        tags = driver.find_elements_by_class_name('fbPhotoStarGridElement')
        sleep(1)
        tags[0].click()

        sleep(1)
        print("starting iteration ")

        self.timer.tic()
        for i in range(2130):

            tager = self.get_name_of_tagger(driver)

            if not self.first_url:
                self.first_url = driver.current_url
            if tager not in self.taggees.keys():
                self.taggees[tager] = {
                    "tag_count": 1,
                    "first_tagged": "",
                    "last_tagged": ""
                }
                print("You have been tagged by {} count is at {}".format(
                    tager, len(self.taggees.keys())))
            else:
                self.taggees[tager]["tag_count"] += 1
            # print("You have been tagged by {} {}  times ".format(tager,taggees[tager]))
            driver.find_element_by_css_selector('body').send_keys(
                Keys.ARROW_RIGHT)
            sleep(self.in_between_wait)

            if self.timer.toc() > self.MAX_RUN_TIME:
                self.stop_iteration_and_display_data()

        self.stop_iteration_and_display_data()

    def stop_iteration_and_display_data(self):
        print("Process took {}s".format(self.timer.toc()))
        self.driver.quit()

        print(
            "Done Iterating through FB Photos, displaying counts now.....\n\n\n\n"
        )
        sleep(1)
        with open('output.txt', 'w') as writer:
            # Alternatively you could use
            # writer.writelines(reversed(dog_breeds))

            # Write the dog breeds to the file in reversed order

            for tager in self.taggees.keys():

                tag_message = "You have been tagged by {} , {} times \n".format(
                    tager, self.taggees[tager]["tag_count"])
                print(tag_message)
                writer.write(tag_message)
                print("The first time you were tagged by them was {}".format(
                    self.taggees[tager]["first_tagged"]))
                print("The last time you were tagged by them was {}".format(
                    self.taggees[tager]["last_tagged"]))
                print(
                    "-----------------------------------------------------------------------------"
                )
        exit()
Exemple #25
0
def mandalorian():
    global UpKey,DownKey,LeftKey,RightKey,PauseKey,QuitKey,Bullet,sheildkey
    tik =TicToc()
    tik.tic()
    STEP = 0.0001;    me = person(50,9);    bsp = boss();    score=0;    lives=5;    co=[];    ob=[];    mag=[];    dra=[];    drag=[];    bu=[];    sp=[];    shstart=0;    spstart=0;    dragonp=0;    sheildp=0;    sheildav=1;    bossm=0;    bb=[];    speed=1;    speedp=0;    time = 60;    bossh=100;    t=0;
    bt=2000
    
    while True:
        tty.setcbreak(sys.stdin.fileno())

        t+=1
        sleep(0.1)
        system('clear')
        # key event
        key_pressed()
        if UpKey:
            UpKey=False
            me.upy(-0.2)
            # if dragonp:                     #on for dragon movement
            #     me.dr-=0.02
        if RightKey:
            RightKey=False
            me.upx(0.1)
        if LeftKey:
            LeftKey=False
            me.upx(-0.1) 
        if Bullet:
            Bullet=False
            bu.append(spr(me.getx()+2,me.gety(),'>'))
        # if DownKey:                         #on for dragon movement
        #     if dragonp:
        #         me.dr+=0.02

        if QuitKey:
            print("GAME OVER")
            return
       	if sheildkey:
       		sheildkey=False
       		if sheildav:
       			sheildp=1 
        		sheildav=0
        		shstart=int(tik.elapsed)
       	#########################################################

        if t%33==0 and bossm==0:
            if t%99==0:createobs(ob)   
            elif (t+33)%99==0:createcoins(co)
            else:
                i=randint(0,2)
                if i==0:createobs(ob)
                elif i==1:createpower(sp,'P')
                else:createpower(dra,'D')
        
        if t%100==0:
            mag.append(spr(135,randint(5,35),'M'))
        

        if t>bt:                         ## boss mode on
            bossm=1
                
        if speedp==1:
            speed=2.5
        else: speed=1.5   
        
        if dragonp==0:
            me.update()
            me.printp(sheildp)
        else:
            me.updatex()
            if t%10>4:drag.append(dragon(me.getx(),me.gety()+t%5,"D"))
            else: drag.append(dragon(me.getx(),me.gety()+4-t%5,"D"))
        
        if bossm:                           ## boss mocd on
            # bb have boss bullets
            if t%4==0:
                bx=95-me.getx()
                by=25-me.gety()
                dis = floor(sqrt((bx)**2+(by)**2))
                vx=(bx/dis)*2+1
                vy=(by/dis)*2
                bb.append(bull(95,bsp.gety(),vx,vy,'*'))
            d =0
            if me.gety()-bsp.gety() > 0:d=1
            bsp.update(d)
            bsp.printb()
            
            for i in bu:  # bullet cause it have differt direct
                i.update(-2)
                if i.gety()>35:
                	bu.remove(i)
                if i.getx()>143:
                	bu.remove(i)
                if i.valid:
                    i.printsp(color.PURPLE)
                else:
                    bu.remove(i)
            for i in bb:  #bossb
                i.update()
                if i.valid:
                    i.printsp(color.RED)
                else:
                    bb.remove(i)        
            
            #boss hit me
            for i in bb:
                if abs(me.getx()-i.getx())<2 and abs(me.gety()-i.gety())<1:
                    bb.remove(i)
                    lives-=1
            # me hit boss
            for i in bu:
                k=(i.gety()-bsp.gety())
                if abs(95-i.getx())<=2 and k<8 and k>=0 :
                    bu.remove(i)
                    bossh-=5 
               
        else:            
            #coins/obstacles/powerup print
               
            for i in co:        
                i.update(speed) 
                if i.valid:
                    i.printsp(color.BOLD+color.YELLOW)
                else:
                    co.remove(i)     
            for j in ob:
                for i in j:
                    i.update(speed) 
                    if i.valid:
                        i.printsp(color.BOLD+color.RED)
                    else:
                        ob.remove(j)
                        break

            for i in sp:        
                i.update(speed) 
                if i.valid:
                    i.printsp(color.DARKCYAN)
                else:
                    sp.remove(i) 
            for i in dra:        
                i.update(speed) 
                if i.valid:
                    i.printsp(color.PURPLE)
                else:
                    dra.remove(i)                                                                     

            for i in bu:  # bullet cause it have differt direct
                i.update(-2)
                if i.gety()>35:
                	bu.remove(i)
                if i.getx()>143:
                 	bu.remove(i)
                if i.valid:
                    i.printsp(color.PURPLE)
                else:
                    bu.remove(i)

            #coin select
            for i in co:
                if abs(me.getx()-i.getx())<2 and abs(me.gety()-i.gety())<1:
                    score+=1
                    co.remove(i)
            
            #collision
            for j in ob:
                for i in j:
                    if sheildp==1:
                        continue
                    if abs(me.getx()-i.getx())<2 and abs(me.gety()-i.gety())<1:
                        ob.remove(j)
                        if dragonp==0:
                            lives-=1
                        dragonp=0
                        break

            #bullet with obstacls
            for k in bu:    
                for j in ob:
                    for i in j:
                        if abs(k.getx()-i.getx())<1 and (k.gety()-i.gety()<1):
                            ob.remove(j)
                            break

            #powerup
            for i in sp:
                if abs(me.getx()-i.getx())<1 and abs(me.gety()-i.gety())<1:
                    sp.remove(i)
                    spstart=int(tik.elapsed)
                    speedp=1        
            
            for i in dra:
                if abs(me.getx()-i.getx())<1 and abs(me.gety()-i.gety())<1:
                    dra.remove(i)
                    dragonp=1         
            #magnet effect
            for i in mag:
                if i.valid==0:
                    mag.remove(i)
                    continue
                i.printsp(color.DARKCYAN)
                disx=i.getx()-me.getx()
                disy=i.gety()-me.gety()
                dis=floor(sqrt(disx**2+disy**2))        
                if dis!=0:
                    me.upx((disx//dis)*2)
                    me.upy((disy//dis)*2)
                i.update(speed)
                
            #dragon if it come
            for i in drag:
                if i.valid==0:
                    drag.remove(i)
                    continue
                i.printsp(color.GREEN)
                i.update()
            if dragonp==0:
                drag=[]
                    

        #sky and ground
        sky()
        if lives==0:
            print("GAME OVER")
            return      

        tik.toc()
        ct=int(tik.elapsed)
        # print("\033[41;0f {},{}".format(me.x,me.y))
        # print("\033[42;0f {},{}".format(me.velx,me.vely))
        print("\033[1;0f score:{}".format(score))
        print("\033[2;0f lives:{}".format(lives))
        if sheildav==0:
            if 60-(ct-shstart)>=0:
                print("\033[3;0f sheild:{} ({})".format(sheildav,60-(ct-shstart)))
            else:
                sheildav=1    
        else:
            print("\033[3;0f sheild:{}  press t".format(sheildav))
        
        if sheildp:
        	if 10-(ct-shstart)<=0:
        		sheildp=0


        if speedp:
            if 20-(ct-spstart)>=0:
                print("\033[4;0f speed:{} ({})".format(speedp,20-(ct-spstart)))
            else:
                speedp=0
        else:
            print("\033[4;0f speed:{}".format(speedp))

        print("\033[5;0f dragon:{}".format(dragonp))
            
        if bossm:
            print("\033[3;90f BOSS HEALTH:{}".format(bossh))

            if bossh < 1 :
                print("\033[10;80f YOU SAVED BABY YODA! :)")
                return
          
        
        time=200-ct
        if time == 0:
            print("TIME OVER")
            return
        print("\033[6;0f time:{}".format(time))
        if bossm==0:
        	print("\033[3;50f percentage:{}".format(int((t*100)/bt)))
    ] + new_user_sentence + [  # + is used since new_user_sentence variable is a list
        " ",
    ]

    if option == "f":  # the function will write the spelling report in the new file, if option is file
        for item in spell_check_report:
            item = item + "\n"  # the function will change line, i.e. by adding "\n", at the end of every line
            file.write(item)
    return spell_check_report


# this is the main spell checking program that uses the three functions
continue_check_spelling = True
while continue_check_spelling:
    time_recorded = TicToc()  # using time_recorded function to count the time
    time_recorded.tic()  # start timing
    print_lines = [
        " ",
        " Enter[F]. to check a [File] ",
        " ",
        " Enter[S]. to check a [Sentence] ",
        " ",
        " ",
        " Enter anything else to quit ",
        " ",
    ]
    format(print_lines, False, "  S P E L L   C H E C K E R")
    option = input("\u2517" + "\u2501" * 5 + " Enter choice: ").lower()

    # If user entered "File", "Sentence", "[F]" or "S" that have f or s in it they are allow to enter the option again
    if len(option) != 1:
Exemple #27
0
    def count_words(self):
        return self.__count_words(self.root)

    def __count_words(self, node, count=0):
        if node.isEndOftheWord:
            count += 1

        for key in node.childrens.keys():
            nxt_node = node.childrens[key]

            count = self.__count_words(nxt_node, count)

        return count


t = Trie()
clock = TicToc()
db = requests.get(
    'https://raw.githubusercontent.com/dwyl/english-words/master/words_dictionary.json'
).json().keys()

for d in db:
    t.insert(d)

clock.tic()
print(t.count_words())
clock.toc()

print(clock.elapsed)
Exemple #28
0
path='/Users/trondkr/Dropbox/MON-data/'
files = [f for f in glob.glob(path + "EXPORTED/*.txt", recursive=False)]
totalfiles=len(files)
currentfile=0

nocounter=['NORD1_2019','NORD2_2019','OFOT1_2019','OFOT2_2019','OKS1_2019','OKS2_2019',
           'SAG1_2019','SAG2_2019','SJON1_2019','SJON2_2019','TYS1_2019','TYS2_2019']
withcounter=[]

# Originally from Excel spreadsheet these files did not have Serial counter
withcounter=['OKS2_2018','NORD1_2018','NORD2_2018','OFOT1_2018','OFOT2_2018','OKS1_2018',
         'OKS2_2018','SAG1_2018','SAG2_2018','SJON1_2018','SJON2_2018','TYS1_2018', 'TYS2_2018']

header="Ser	Meas	Salinity	Temp	F	Opt	Opml	Density	Press	Date	Time	Lat	Lon	Depth\n"
t = TicToc()
t.tic();
print("Conversion starting\n")
for f in files:
    currentfile+=1
    filename_base=Path(f).resolve().stem
    newfile='{}_edited.txt'.format(path+filename_base)
    if os.path.exists(newfile):
        os.remove(newfile)
    out=open(newfile,'a')
    progress=(currentfile/totalfiles*1.0)*100.
    print("==> New file will be written to {} ({:3.2f}% done)".format(newfile,progress))
    infile=open(f,'r')
    lines=infile.readlines()
    counter=0
    first=True
    
Exemple #29
0
p_map.x = x0
p_map.sa2xp = model.sa2xp_y_xdot_timedaoa
# p_map.sa2xp = model.sa2xp_y_xdot_aoa
p_map.xp2s = model.xp2s_y_xdot

s_grid_height = np.linspace(0.5, 1.5, 7)
s_grid_velocity = np.linspace(3, 8, 7)
s_grid = (s_grid_height, s_grid_velocity)
a_grid_aoa = np.linspace(00/180*np.pi, 70/180*np.pi, 21)
# a_grid = (a_grid_aoa, )
a_grid_amp = np.linspace(0.9, 1.2, 11)
a_grid = (a_grid_aoa, a_grid_amp)

grids = {'states': s_grid, 'actions': a_grid}
t = TicToc()
t.tic()
Q_map, Q_F, Q_reach = vibly.parcompute_Q_map(grids, p_map, keep_coords=True,
                                          verbose=2)
t.toc()
print("time elapsed: " + str(t.elapsed/60))
Q_V, S_V = vibly.compute_QV(Q_map, grids)
S_M = vibly.project_Q2S(Q_V, grids, proj_opt=np.mean)
Q_M = vibly.map_S2Q(Q_map, S_M, s_grid, Q_V=Q_V)
# plt.scatter(Q_map[1], Q_map[0])
print("non-failing portion of Q: " + str(np.sum(~Q_F)/Q_F.size))
print("viable portion of Q: " + str(np.sum(Q_V)/Q_V.size))

import itertools as it
# Q0 = np.zeros((len(grids['states']), total_gridpoints))
# def create_x0(grids):
#     for idx, state_action in enumerate(np.array(list(
Exemple #30
0
def detect_motion(frameCount):
    # grab global references to outputFrame
    global outputFrame
    outputFrame = None

    #Create timer
    timer = TicToc()

    # initialize the motion detector and the total number of frames
    # read thus far
    md = SingleMotionDetector(accumWeight=0.2)
    total = 0
    record = True

    # Starts recording
    cap = cv2.VideoCapture(0)
    out = cv2.VideoWriter(filename, get_video_type(filename), 25,
                          get_dims(cap, res))

    # Starts Timer
    timer.tic()

    #Recording starts
    while (record is True):

        # Read video streaming frames and send it to video
        ret, frame = cap.read()
        out.write(frame)

        # read the next frame from the video stream, resize it,
        # convert the frame to grayscale, and blur it

        frame = imutils.resize(frame, width=400)
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (7, 7), 0)

        # if the total number of frames has reached a sufficient
        # number to construct a reasonable background model, then
        # continue to process the frame
        if total > 32:
            # detect motion in the image
            motion = md.detect(gray)
            #If motion is detected, restart timer
            if (motion is not None):
                print("motion")
                timer.tic()

            if (timer.toc() is not None):
                # print(timer.toc())
                #Stop video if motion has ceased for 10s or lock has been placed by webRTC or user has deactivated the camera
                if ((motion is None and timer.toc() >= 5)
                        or (settings.lock is True)
                        or (settings.active is False)):
                    #Stops recording + Cleanup
                    cap.release()
                    out.release()
                    out = None
                    cap = None
                    md = None
                    total = 0
                    cv2.destroyAllWindows()
                    print("video Ends")
                    record = False
                    #Starts a new thread that will upload the video to Firebase
                    threading.Thread(target=upload).start()

                    break

        # update the background model and increment the total number
        # of frames read thus far
        md.update(gray)
        total += 1

        # acquire the lock, set the output frame, and release the
        # lock
        #with lock:
        outputFrame = frame.copy()