Exemple #1
0
def merge_flights_history(mdate):

    vdp = get_vdropbox()

    # Check for monthly folders and get all parquets inside
    for folder in vdp.ls(c.PATH_HISTORY):

        is_date_folder = re.search(r"\d{4}_\d{2}", folder)
        if is_date_folder and ("."
                               not in folder) and (folder < f"{mdate:%Y_%m}"):

            log.info(f"Merging '{folder}' vflights history")

            sub_folder = f"{c.PATH_HISTORY}/{folder}"

            # Read all daily parquets
            dfs = []
            for file in vdp.ls(sub_folder):
                if file.endswith(".parquet"):
                    dfs.append(vdp.read_parquet(f"{sub_folder}/{file}"))

            # Export it as only one parquet file
            df = pd.concat(dfs)
            vdp.write_parquet(df, f"{sub_folder}.parquet")
            log.success(f"Successfuly merged '{folder}' vflights history")

            # Delete original folder
            vdp.delete(sub_folder)
Exemple #2
0
def weights_selection(models):
    WEIGHTS = []
    log.success('|SELECT THE WEIGHTS FOR THE MODELS|')
    for m in models:
        log.success('select the weights for: {}'.format(m))
        WEIGHTS.append(float(input()))
    return WEIGHTS
Exemple #3
0
def show(title, options, can_exit=True, main_menu=False, decorator='++'):
    """
    Display a menu

    options: dictionary in which each key is a string and each value is a tuple (string, function), representing
            the text of the function that will be called when the related string in inserted as input
        ex: { 'a', ('option a', print) } : print 'option a' and when 'a' is pressed, call the function 'print'
    """
    log.success('{} {} {}'.format(decorator, title, decorator))
    for s,f in options.items():
        log.warning('({}) {}'.format(s,f[0]))
    if can_exit:
        log.warning('(x) Exit')
    
    wrong_choice = True
    while(wrong_choice):
        arg = input()
        print()

        try:
            if arg=='x' and can_exit:
                wrong_choice = False
                quit_menu(main_menu)
            else:
                funct = options[arg][1]
                wrong_choice = False
                res = funct()
                quit_menu(main_menu)
                return res
        except KeyError as _:
            log.error('Invalid option, retry:')
Exemple #4
0
def create_one_report(dfs, mdate):
    """Creates a report for one month"""

    data = extract_data.main(dfs, mdate, export_data=False)
    create_report.main(mdate, data=data)

    log.success(f"Report {mdate:%Y-%m} created")
def cluster_ensemble(clip, path_sparse, path_dense):
    sparse_pl, dense_pl = cluster.cluster_users_by_interactions_count(clip=clip)

    log.success('Cluster 1 (interactions count <= {}): {} playlists'.format(clip, len(sparse_pl)))
    log.success('Cluster 2 (interactions count  > {}): {} playlists'.format(clip, len(dense_pl)))

    # filter target playlists from the 2 clusters
    s1 = set(sparse_pl)
    s2 = set(dense_pl)
    s_target = set(data.get_target_playlists())
    s1_target = s1 & s_target
    s2_target = s2 & s_target

    sparse_pl = pd.DataFrame({'playlist_id':list(s1_target)})
    dense_pl= pd.DataFrame({'playlist_id': list(s2_target)})




    df_sparse = pd.read_csv(path_sparse)
    df_dense = pd.read_csv(path_dense)

    cluster1 = df_sparse.merge(sparse_pl)
    cluster2 = df_dense.merge(dense_pl)

    final = pd.concat([cluster1, cluster2])
    final.to_csv(path_or_buf='submissions/cluster_ensemble' + t.strftime('_%H-%M-%S'), index=False)
Exemple #6
0
        def on_packet(packet: bytes):
            global should_close, chunk_index, file_bytes

            index, packet = packet[:4], packet[4:]
            index, = struct.unpack("i", index)

            if index != chunk_index:
                return

            file_bytes += packet

            log(f"Received chunk {chunk_index}")

            if chunk_index == chunks_count - 1:
                # Basically, MD5 check is redundant, as we've guaranteed
                # the correct order of the individual packets and their
                # integrity
                if md5 == hashlib.md5(file_bytes).digest():
                    connection.status = b"md5 ok"

                    with open(file_name, "wb") as file:
                        file.write(file_bytes)

                    log.success("Receive successful")
                    should_close = True
                else:
                    connection.status = b"md5 error"
                    log.error("MD5 error", file=sys.stderr)
            else:
                connection.status = b"received " + struct.pack(
                    "i", chunk_index)
                chunk_index += 1
Exemple #7
0
def upush_summary(update_repo_path, dest_base, update_files, update_fails, temp_path, need_sync=None, debug=False):
    pusher = pushupdate.UpdatePusher(
        update_repo_path, dest_base, update_files=update_files, fails=update_fails, temp_path=temp_path, debug=debug
    )
    pusher.print_summary()
    if need_sync:
        log.success("Updated trees (might need sync):")
        helpers.print_list(map(os.path.basename, need_sync), nl_after=True)
        log.info(log.term.warn("Don't forget to push changes to update repo:"))
        log.info(update_repo_path)
        log.info("")
Exemple #8
0
def option_selection_evaluation_2():
    log.success('|EVALUATE OR SAVE THE MATRIX?|')
    log.warning('\'s\' save the matrix')
    log.warning('\'e\' evaluate the matrix')
    log.warning('\'c\' create the CSV')

    selection = input()[0]
    if selection in ['s', 'e', 'c']:
        return selection
    else:
        log.info('wrong mode')
        exit(0)
Exemple #9
0
def urls():
    # regex for URLs
    urls = findall(url_regex, context.default_txt())
    
    if len(urls) == 0:
        log.fail('No URLs')
        return

    log.success('URLs:')

    for url in urls:
        log.info(url, indent=1)
Exemple #10
0
def resources():
    # regex for resources, e.g. /api/v2
    resources = findall(resource_regex, context.default_txt())
    
    if len(resources) == 0:
        log.fail('No Resources')
        return

    log.success('Resources:')

    for res in resources:
        log.info(res[1], indent=1)
Exemple #11
0
def comments():
    # Use BeautifulSoup to extract all comments
    soup = BeautifulSoup(context.default_txt(), 'html.parser')
    comments = soup.find_all(string=lambda text: isinstance(text, Comment))

    if len(comments) == 0:
        log.fail('No comments')
        return

    log.success('Comments:')

    for c in comments:
        log.info(c, indent=1)
Exemple #12
0
    def save_r_hat(self, evaluation):

        r_hat = self.W_sparse
        r_hat = check_matrix(r_hat, format='csr')

        # create dir if not exists
        if evaluation:
            filename = 'raw_data/saved_r_hat_evaluation/{}_{}'.format(self.name, time.strftime('%H-%M-%S'))
            os.makedirs(os.path.dirname(filename), exist_ok=True)
        else:
            filename = 'raw_data/saved_r_hat/{}_{}'.format(self.name, time.strftime('%H-%M-%S'))
            os.makedirs(os.path.dirname(filename), exist_ok=True)

        sps.save_npz(filename, r_hat)
        log.success('R_hat succesfully saved in: {}.npz'.format(filename))
Exemple #13
0
def option_selection_evaluation(type):
    if type == 'SIM':
        # LET USER CHOOSE OPTIONS
        log.success('STUDY HARD | WORK HARD | F**K HARD |')
        log.warning('\'s\' for save the r_hat in saved_r_hat_evaluation')
        log.warning('\'m\' for compute the MAP@10')
        option = input()[0]

        if option == 's':
            urm_filter_tracks = data.get_urm_train_1()
            rel_path = 'saved_r_hat_evaluation/'
            log.success('SELECT A NAME FOR THE MATRIX')
            name = input()
        elif option == 'm':
            urm_filter_tracks = data.get_urm_train_1()
            rel_path = None
            name = None
        else:
            log.warning(
                'CON UNA MANO SELEZIONI E CON L\'ALTRA FAI UNA SEGA AL TUO RAGAZZO...'
            )
            exit(0)
        return name, urm_filter_tracks, rel_path
    elif type == 'R_HAT':
        # LET USER CHOOSE OPTIONS
        log.success('STUDY HARD | WORK HARD | F**K HARD |')
        log.warning('\'s\' for save the r_hat in saved_r_hat')
        log.warning('\'e\' for EXPORT and get a SUB')
        option = input()[0]

        if option == 's':
            log.success('SELECT A NAME FOR THE MATRIX')
            name = input()
            urm_filter_tracks = data.get_urm()
            rel_path = 'saved_r_hat/'
            EXPORT = False
        elif option == 'e':
            log.success('SELECT A NAME FOR THE SUB')
            name = input()
            urm_filter_tracks = data.get_urm()
            rel_path = None
            EXPORT = True
        else:
            log.warning(
                'CON UNA MANO SELEZIONI E CON L\'ALTRA FAI UNA SEGA AL TUO RAGAZZO...'
            )
            exit(0)
        return name, urm_filter_tracks, rel_path, EXPORT
Exemple #14
0
def user_agents():
    # Get standard length of response
    length = context.default_len()
    log.info(f'Standard Response Length: {length}')

    # Iterate through all User-Agent, comparing response length to original
    # If different, print that out
    for agent in user_agents_list:
        r = context.session.get(context.url, headers={'User-Agent': agent})

        if len(r.text) != length:
            log.info(agent, indent=1)
            log.success(f'Response size is different: {len(r.text)}', indent=2)
        else:
            log.fail(agent, indent=1)
            log.fail('Default length', indent=2)
Exemple #15
0
def test_post():
    r = context.session.post(context.url)

    if r.status_code == 501:
        log.fail('POST request throws Code 501 (Unsupported Method)')
    elif r.status_code == 200:
        log.success('POST accepted!')
        
        length = len(r.text)

        if context.default_len() == length:
            log.fail('GET and POST responses are of same length', indent=1)
        else:
            log.success('GET and POST responses are of different lengths!', indent=1)
    else:
        log.info(f'POST returns Code {r.status_code} - could be something there')
Exemple #16
0
def get_jwts():
    response = get_full_response(context.default_req)

    jwts = findall(jwt_regex, response)

    if len(jwts) == 0:
        log.fail('No JWTs')
        return
    
    log.success('JWTs found:')

    for jwt in jwts:
        log.success(jwt, indent=1)

        # The last section of a JWT is the signature
        for section in jwt.split('.')[:-1]:
            log.info(standard_b64decode(section), indent=2)
Exemple #17
0
def redirects():
    # Grab the request's history
    history = context.default_req.history

    if len(history) == 0:
        log.fail('No redirects')
        return

    log.success('Redirects:')

    for url in history:
        red = Fore.RED + str(url.status_code) + Fore.RESET
        red = red.ljust(20, ' ')
        red += url.url

        log.info(red, indent=1)

        redirect(url.url)
Exemple #18
0
    def fit(self, clip=7):
        sparse_pl, dense_pl = cluster.cluster_users_by_interactions_count(
            clip=clip)

        log.success(
            'Cluster 1 (interactions count <= {}): {} playlists'.format(
                clip, len(sparse_pl)))
        log.success(
            'Cluster 2 (interactions count  > {}): {} playlists'.format(
                clip, len(dense_pl)))

        # filter target playlists from the 2 clusters
        s1 = set(sparse_pl)
        s2 = set(dense_pl)
        s_target = set(data.get_target_playlists())
        s1_target = s1 & s_target
        s2_target = s2 & s_target
        self.sparse_pl = list(s1_target)
        self.dense_pl = list(s2_target)
Exemple #19
0
def normalization_mode_selection():
    log.success('|SELECT THE NORMALIZATION MODE|')
    log.warning('\'1\' MAX MATRIX')
    log.warning('\'2\' MAX ROW')
    log.warning('\'3\' L2 NORM')
    log.warning('\'4\' NONE')

    selection = input()[0]
    if selection == '1':
        NORMALIZATION_MODE = 'MAX_MATRIX'
    elif selection == '2':
        NORMALIZATION_MODE = 'MAX_ROW'
    elif selection == '3':
        NORMALIZATION_MODE = 'L2'
    elif selection == '4':
        NORMALIZATION_MODE = 'NONE'
    else:
        log.error('wrong mode')
        exit(0)
    return NORMALIZATION_MODE
Exemple #20
0
def print_summary(good, fails, good_s, fail_s):
    n_good = len(good)
    n_fails = len(fails)
    n_all = n_good + n_fails
    if good:
        log.success("\n%d updates %s:" % (n_good, good_s))
        fmt = '{t.bold}{upf}:{t.normal}\n{up}'
        if len(good[0]) == 2:
            l = map(lambda x: fmt.format(t=log.term, upf=x[0], up=x[1]), good)
        else:
            l = []
            for upf, up, builds in good:
                bstr = '\n'.join(map(str, builds))
                l.append(fmt.format(t=log.term, upf=upf, up=bstr))
        helpers.print_list(l)
    if fails:
        log.error("\n%s updates %s:" % (n_fails, fail_s))
        fmt = "{t.warn}{upf}:{t.normal} {err}"
        l = map(lambda x: fmt.format(t=log.term, upf=x[0], err=str(x[1])),
                fails)
        helpers.print_list(l)
Exemple #21
0
def extract_gcal_confusions(exclude_other=True,
                            merge_study=True,
                            min_alpha=0.1):
    vdp = get_vdropbox()

    dfg = vdp.read_parquet(PATH_GCAL_DATA)

    df_aux = clear_potential_confusions(dfg, exclude_other, merge_study)
    df_matrix = get_confusion_matrix(df_aux,
                                     col_text="summary",
                                     col_category="calendar")
    df_confusions = filter_confusions(df_matrix, min_alpha)

    num_confusions = df_confusions.shape[0]

    if num_confusions > 0:
        log.warning(
            f"There are {num_confusions} in google calendar. Exporting them")
        vdp.write_excel(df_confusions, PATH_CONFUSIONS)
    else:
        log.success("There are no confusions in google calendar")
Exemple #22
0
def redirect(url):
    if '?' not in url:
        log.fail('Redirect contains no GET parameters', indent=2)
        return

    # URL Decode
    params = unquote(url.split('?')[1]).split('&')

    for p in params:
        name, value = p.split('=')

        if re.match(url_regex, value):
            log.success(
                'Redirect appears to contain a URL - possible RFI or use of another URL scheme?',
                indent=2)
            continue

        if re.match(resource_regex, value):
            log.success(
                'Redirect appears to contain a resource - possible LFI?',
                indent=2)
            continue

        if re.match(file_regex, value):
            log.success(
                'Redirect appears to contain a filename - possible LFI?',
                indent=2)
            continue
Exemple #23
0
            for chunk_index in range(chunks_count):
                log(f"Sending chunk {chunk_index + 1}/{chunks_count}")

                status = connection.remote_status
                while status != b"received " + struct.pack("i", chunk_index) and \
                        status != b"md5 ok" and status != b"md5 error":
                    connection.status = b"packet " + struct.pack(
                        "i", chunk_index)
                    chunk = struct.pack(
                        "i",
                        chunk_index) + file_bytes[chunk_index * chunk_size:
                                                  (chunk_index + 1) *
                                                  chunk_size]
                    connection.send_message(b"packet", chunk)
                    status = connection.remote_status

            # check md5
            log("Checking MD5")
            while True:
                if status == b"md5 ok":
                    log.success("Send successful")
                    exit(0)

                if status == b"md5 error":
                    log.error("MD5 mismatch")
                    break

            # Reset the receiver
            while not connection.remote_status == b"header received":
                connection.send_message(b"reset", b"")
Exemple #24
0
def export_csv_wizard(recommendations):
    log.info('Choose a name for the CSV:')
    name = input()
    exportcsv(recommendations, name=name)
    log.success('CSV saved!')
Exemple #25
0
def wizard_hybrid():
    SIM_MATRIX = ['saved_sim_matrix', 'saved_sim_matrix_evaluation']
    R_HAT = ['saved_r_hat', 'saved_r_hat_evaluation']
    SAVE = ['saved_sim_matrix', 'saved_r_hat']
    EVALUATE = ['saved_sim_matrix_evaluation', 'saved_r_hat_evaluation']

    start = time.time()

    matrices_array, folder, models = hb.create_matrices_array()

    print('matrices loaded in {:.2f} s'.format(time.time() - start))
    log.success('You have loaded: {}'.format(models))

    NORMALIZATION_MODE = normalization_mode_selection()

    if folder in SAVE:
        WEIGHTS = weights_selection(models)

        if folder in SIM_MATRIX:
            name, urm_filter_tracks, rel_path = option_selection_save('SIM')
            hybrid_rec = HybridSimilarity(
                matrices_array,
                normalization_mode=NORMALIZATION_MODE,
                urm_filter_tracks=urm_filter_tracks)
            sps.save_npz('raw_data/' + rel_path + name,
                         hybrid_rec.get_r_hat(weights_array=WEIGHTS))
        if folder in R_HAT:
            name, urm_filter_tracks, rel_path, EXPORT = option_selection_save(
                'R_HAT')
            hybrid_rec = HybridRHat(matrices_array,
                                    normalization_mode=NORMALIZATION_MODE,
                                    urm_filter_tracks=urm_filter_tracks)
            if EXPORT:
                N = ask_number_recommendations()
                recommendations = hybrid_rec.recommend_batch(
                    weights_array=WEIGHTS,
                    target_userids=data.get_target_playlists(),
                    N=N)
                exportcsv(recommendations, path='submission', name=name)
            else:
                sps.save_npz('raw_data/' + rel_path + name,
                             hybrid_rec.get_r_hat(weights_array=WEIGHTS))

    elif folder in EVALUATE:
        log.success('|WHAT YOU WANT TO DO ???|')
        log.warning('\'1\' BAYESIAN SEARCH VALIDATION')
        log.warning('\'2\' HAND CRAFTED WEIGHTS')
        mode = input()[0]

        # BAYESIAN SEARCH
        if mode == '1':
            log.success(
                '|SELECT A NUMBER OF |||ITERATIONS||| FOR THE ALGORITHM|')
            iterations = float(input())
            urm_filter_tracks = data.get_urm_train_1()
            if folder in SIM_MATRIX:
                hybrid_rec = HybridSimilarity(
                    matrices_array,
                    normalization_mode=NORMALIZATION_MODE,
                    urm_filter_tracks=urm_filter_tracks)
            if folder in R_HAT:
                hybrid_rec = HybridRHat(matrices_array,
                                        normalization_mode=NORMALIZATION_MODE,
                                        urm_filter_tracks=urm_filter_tracks)
            hybrid_rec.validate(iterations=iterations,
                                urm_test=data.get_urm_test_1(),
                                userids=data.get_target_playlists())

        # MANUAL WEIGHTS
        elif mode == '2':
            WEIGHTS = weights_selection(models)
            urm_filter_tracks = data.get_urm_train_1()
            chose = option_selection_evaluation_2()  # save, evaluate or csv
            if chose == 's':
                log.success('|CHOSE A NAME FOR THE MATRIX...|')
                name = input()
                if folder in SIM_MATRIX:
                    type = 'SIM'
                    hybrid_rec = HybridSimilarity(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)
                elif folder in R_HAT:
                    type = 'R_HAT'
                    hybrid_rec = HybridRHat(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)

                sps.save_npz('raw_data/saved_r_hat_evaluation/' + name,
                             hybrid_rec.get_r_hat(weights_array=WEIGHTS))
                sym_rec = symmetric_recommender_creator(
                    models,
                    type,
                    NORMALIZATION_MODE,
                    urm_filter_tracks=data.get_urm_train_2())
                sps.save_npz('raw_data/saved_r_hat_evaluation_2/' + name,
                             sym_rec.get_r_hat(weights_array=WEIGHTS))

            elif chose == 'e':
                if folder in SIM_MATRIX:
                    type = 'SIM'
                    hybrid_rec = HybridSimilarity(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)
                elif folder in R_HAT:
                    type = 'R_HAT'
                    hybrid_rec = HybridRHat(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)
                N = ask_number_recommendations()
                print('Recommending...')
                recs = hybrid_rec.recommend_batch(
                    weights_array=WEIGHTS,
                    target_userids=data.get_target_playlists(),
                    N=N)
                hybrid_rec.evaluate(recommendations=recs,
                                    test_urm=data.get_urm_test_1())

                # export the recommendations
                log.success(
                    'Do you want to save the CSV with these recomendations? (y/n)'
                )
                if input()[0] == 'y':
                    export_csv_wizard(recs)

                sym_rec = symmetric_recommender_creator(
                    models,
                    type,
                    NORMALIZATION_MODE,
                    urm_filter_tracks=data.get_urm_train_2())
                recs2 = sym_rec.recommend_batch(
                    weights_array=WEIGHTS,
                    target_userids=data.get_target_playlists())
                sym_rec.evaluate(recommendations=recs2,
                                 test_urm=data.get_urm_test_2())

            elif chose == 'c':
                if folder in R_HAT:
                    hybrid_rec = HybridRHat(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)
                    N = ask_number_recommendations()
                    print('Recommending...')
                    recs = hybrid_rec.recommend_batch(
                        weights_array=WEIGHTS,
                        target_userids=data.get_target_playlists(),
                        N=N)

                    export_csv_wizard(recs)
                else:
                    log.error('not implemented yet')
    else:
        log.error('WRONG FOLDER')
Exemple #26
0
def ask_number_recommendations():
    log.success('Select the number of recommendations (default: 10)')
    N = int(input())
    return N
Exemple #27
0
                        shrink=shrink,
                        threshold=threshold,
                        implicit=implicit,
                        alpha=alpha,
                        beta=beta,
                        l=l,
                        c=c,
                        export=False)


"""
If this file is executed, test the SPLUS distance metric
"""
if __name__ == '__main__':
    print()
    log.success('++ What do you want to do? ++')
    log.warning('(t) Test the model with some default params')
    log.warning('(r) Save the R^')
    log.warning('(s) Save the similarity matrix')
    #log.warning('(v) Validate the model')
    log.warning('(x) Exit')
    arg = input()[0]
    print()

    model = CFUserBased()
    if arg == 't':
        # recs = model.recommend_batch(userids=data.get_target_playlists(), urm=data.get_urm_train())
        # model.evaluate(recommendations=recs, test_urm=data.get_urm_test())
        model.test(distance=CFUserBased.SIM_SPLUS,
                   k=600,
                   alpha=0.25,
Exemple #28
0
                    action='store_true')

args = parser.parse_args()

# Set the Context
context.url = fix_url(args.url)
context.file = fix_filepath(args.output)

context.session = Session()

context.default_req = grab(text=False)

if args.agent:
    context.session.headers.update({'User-Agent': args.user})

if args.cookies:
    context.session.cookies.update(cookie_string_to_dict(args.cookies))

if args.hide:
    context.hide_fail = True

if args.username and args.password:
    context.session.auth = (args.username, args.password)

# Log it all
log.success(f'Analysing {context.url}')
log.success(f'Saving output to {context.file}')

# Execute the different modules
recon.execute(not args.nagent)