예제 #1
0
def main():
    # output some information
    print("=====================================================")
    print("Welcome to Letterboxd Friend Ranker!")
    print("Instructions: This program compares you and")
    print("your friend's film taste. Once all the data has")
    print("been scraped and scores have been computed,")
    print("a report will be generated. The lower the avg.")
    print("difference, the better. If you and a friend do")
    print("not share at least 30 watched films, a score will")
    print("not be computed.")
    print("=====================================================\n")

    # prompt for info
    file_name = ""
    username = input("Enter your Letterboxd username: "******"Scraping friend data...\n")
    friends = sc.scraper(username)

    print("Computing scores...\n")
    # compute commonality for each friend
    results = commonality(current_user, friends)

    # write report
    print("Generating report...\n")
    gr.generate_report(results, current_user)

    print("Done! View the report in the current directory!")
예제 #2
0
def main(file):

    clean = input("""
    Clean up old report files?\n

    This will NOT delete but move old files (i.e. previously sourced data) to a  directory at your current location
    (If you don\'t do this and choose option 1 at hte next prompt your new report data will mix mixed with the old)\n

    Usage:
        if you have previously ran this application and are running it again with the intention of specifying
        a new date-range. i.e. you are going to choose option 1 at the next prompt.

    Type y or n and press <Enter> [y/n] """)

    if clean == 'y':
        clean_up()
        print("----------------NEXT CHOICE-----------------")

    choice = choose()

    if choice == 1:
        prefix = input("What kind or report are you running: 'metered' customers or 'control' customers : ")
        endpoints = parser(file)
        generate_script(endpoints)
        run_report()
        generate_report(prefix)
    elif choice == 2:
        prefix = input("What kind or report are you running: 'metered' customers or 'control' customers : ")
        generate_report(prefix)
예제 #3
0
def sort_camera_trap_images(unsorted_dir):

    json_name = os.path.dirname(unsorted_dir) + ".json"
    snapcat_database_dir = os.path.join(unsorted_dir, json_name)
    snapcat_json = json_database.JSONDatabase(snapcat_database_dir)

    burst.create_bursts(snapcat_json, unsorted_dir)
    segmentation.segment_images(snapcat_json)
    label_images.label_images(snapcat_json)

    user_label_image.user_label_images_burst(snapcat_json)

    generate_report.generate_report(snapcat_json, unsorted_dir)
예제 #4
0
def main(app_name):
    print "=== HPA Test ==="
    print "--- Start to Run K8sHPA Test x %d and Federator.ai Test x %d ---" % (
        number_k8shpa, number_alamedahpa)
    start_time = time.time()
    i = 0
    test_case_list = get_test_case_list()
    for test_case in test_case_list:
        run_scenario(test_case, app_name, i)

    generate_report(["table"])
    end_time = time.time()
    duration = (end_time - start_time) / 60
    print "It takes %d minutes" % duration
def reports():
    # Begin report generation
    groups = dfd.groupby('var_type')
    from plot import plot_dists
    for name, group in groups:
        plot_dists(group.sstop - group.sstart, name,
                rpath)
    generate_report(report_dict)
    type_count = dfd.groupby('var_type').agg(lambda x:
            x.shape[0]).loc[:, ['chr']]
    var_percent = type_count.ix[:,0]/float(dfd.shape[0])*100
    type_count['var_percent'] = var_percent
    print(type_count)
    report_dict['type_counts'] = type_count.to_html()
예제 #6
0
 def run(self):
     while True:
         tbd_url = self.frontier.get_tbd_url()
         if not tbd_url:
             self.logger.info("Frontier is empty. Stopping Crawler.")
             generate_report()
             break
         resp = download(tbd_url, self.config, self.logger)
         self.logger.info(f"Downloaded {tbd_url}, status <{resp.status}>, "
                          f"using cache {self.config.cache_server}.")
         scraped_urls = scraper(tbd_url, resp)
         for scraped_url in scraped_urls:
             self.frontier.add_url(scraped_url)
         self.frontier.mark_url_complete(tbd_url)
         time.sleep(self.config.time_delay)
예제 #7
0
def sort_camera_trap_images(unsorted_dir):

    json_name = os.path.basename(os.path.dirname(unsorted_dir)) + ".json"
    snapcat_database_dir = os.path.join(unsorted_dir, json_name)
    snapcat_json = json_database.JSONDatabase(snapcat_database_dir)

    burst.create_bursts(snapcat_json, unsorted_dir)
    #segmentation.segment_images( snapcat_json )

    #label_images( snapcat_json )

    # TODO make sure this is working well to deliver to Island conservation.
    # make sure the label is saved in the dataset
    user_label_image.user_label_images_burst(snapcat_json)

    generate_report.generate_report(snapcat_json, unsorted_dir)
예제 #8
0
def run(daysago):
    """A simple program that reports on your daily desktop app usage."""
    cronstatus = setup_cron()
    if not cronstatus:
        click.echo('Cronjob already setup')
    else:
        click.echo(click.style('Cronjob setup successfull', fg='green'))

    date = datetime.now() - timedelta(days=daysago)
    date = date.strftime("%m-%d-%Y")
    report = generate_report(date)

    if not report:
        click.echo(click.style('No data for this date', fg='red'))
        return

    click.echo(click.style('Report generated sucessfully', fg='green'))
    click.echo(f'Stored to {report}')

    open_report(report)
def main():
    config = ConfigParser.RawConfigParser()
    config.read(sys.argv[1])
    gpath = config.get('input', 'make_ref')
    size_limit = config.getfloat('params', 'max_size')
    files = glob.glob(gpath + "tab/*.txt")
    studies_include = config.get('params', 'studies_include')
    studies_exclude = config.get('params', 'studies_exclude').split(",")
    vartype_f = config.get('params', 'var_type')
    if studies_include == '' or studies_include == None:
        studies_include = []
    else:
        studies_include = studies_include.split(",")
    filtered = []
    start = timeit.default_timer()
    pool = mp.Pool(8)
    files = files[0:20]
    studies = [i.split("/")[-1].rstrip(".txt") for i in files]
    for i in files:
        study = i.split("/")[-1].rstrip(".txt")
        if study in studies_exclude: pass
        else:
            if (len(studies_include) == 0) or (study in studies_include):
                reader = pd.read_csv(i,
                                     sep="\t",
                                     index_col=0,
                                     dtype={'chr': 'S5'})
                pool.apply_async(filter_by_size, [reader, study],
                                 {'max_size': size_limit},
                                 callback=lambda x: filtered.append(x))
            else:
                pass
    # Remove duplicated elements
    ###### Step takes around 7 minutes ###################
    pool.close()
    pool.join()
    df = pd.concat(filtered)
    print(vartype_f)
    stop = timeit.default_timer()
    print('Time to load in files and parse: {0!s}'.format(stop - start))
    p_studies = set(df.study)
    non_passed = []
    for i in studies:
        if i not in p_studies:
            non_passed.append(i)
    print(('Studies that had no variants that did '
           'not pass size filtering:{0}').format("\t".join(non_passed)))
    ############## HACK for now until we find out what is going on #
    # Get rid of the contigs for now
    df = df.ix[df.contig.isnull(), :]
    # The GRc37 to 38 multiple mapping isn't resolved need to discuss how to
    # deal with this
    df = df.ix[np.logical_not(df.index.duplicated()), :]
    # :TODO if sstart and sstop are the same, no
    # matter if it was originally annotated as inner_start
    # or inner stop it will be collapsed
    # For now since, ignore fuzzy
    dfd = df.drop_duplicates(['chr', 'var_type', 'sstart', 'sstop'],
                             inplace=False)
    new_unique_index = np.arange(dfd.shape[0])
    dfd.loc[:, 'uID'] = new_unique_index
    print('new index created')
    # This step takes forever
    start = timeit.default_timer()
    groups = df.groupby('chr')
    unique_mapping = []
    pool = mp.Pool(8)
    for name, group in groups:
        pool.apply_async(generate_unique_mapping,
                         args=(dfd.ix[dfd.chr == name, :], group),
                         callback=lambda x: unique_mapping.append(x))
        '''
        tgroup = dfd.ix[dfd['chr'] == name,]
        pool.apply_async(generate_unique_mapping_numba,
                args = (group.sstart.values, 
                    group.sstop.values, 
                    tgroup.sstart.values, 
                    tgroup.sstop.values, 
                    tgroup.index.values),
                callback=lambda x: unique_mapping.append(pd.Series(x,
                    index = group.index)))
        '''
    pool.close()
    pool.join()
    ns = pd.concat(unique_mapping)
    print('Time to generate mapping: {0!s}'.format((stop - start)))
    df['uID'] = ns
    report_dict = {}
    nstudies = config.getint('params', 'nstudies')
    start = timeit.default_timer()
    output = np.zeros(dfd.uID.shape[0], dtype=bool)
    embed()
    std_filter = groupby_study_numba(df.uID.values,
                                     df.study,
                                     output,
                                     nstudies=nstudies)
    print(np.sum(std_filter))
    dfd = dfd.ix[std_filter, :]
    df = df.ix[df.uID.isin(dfd.uID), :]
    dfd.to_csv(gpath + 'filtered_no_dupes.txt', sep="\t")
    df.to_csv(gpath + 'study_filtered_all.txt', sep="\t")
    print('Time to run: {0!s}'.format(stop - start))
    groups = dfd.groupby('var_type')
    from plot import plot_dists
    generate_report(report_dict)
    rpath = config.get('output', 'report_dir')
    for name, group in groups:
        plot_dists(group.sstop - group.sstart, name, rpath)
    type_count = dfd.groupby('var_type').agg(lambda x: x.shape[0]).loc[:,
                                                                       ['chr']]
    var_percent = type_count.ix[:, 0] / float(dfd.shape[0]) * 100
    type_count['var_percent'] = var_percent
    type_count['var_percent'].round(2)
    report_dict['var_type_pivot'] = type_count.to_html()
    report_dict['studies'] = []
    report_dict['var_types'] = [name for name, _ in groups]
    generate_report(report_dict)
예제 #10
0
    crater_analysis_list = []
    for file in files:
        crater_analysis = DataCraterAnalysis()
        mesh_name = file.stem
        print(f"processing {mesh_name}")
        output_name = mesh_name + ".csv"
        mesh, mesh_points, fit_parameters = plane_fit.prepare_and_fit(
            str(file))
        # Mesh export (stl)
        mesh.export(out_dir.joinpath(f"{mesh_name}_transformed.stl"))
        crater_analysis.project_name = project_name
        crater_analysis.name = mesh_name
        crater_analysis.points = mesh_points
        crater_analysis.fit = fit_parameters
        for plane in [[0, 1, 0], [1, 0, 0]]:
            cs = slice_mesh(mesh, direction=plane)
            crater_analysis.cross_section = pd.concat(
                [crater_analysis.cross_section, cs], axis=1)
            crater_analysis.cross_section.to_csv(
                out_dir.joinpath(f"{mesh_name}_cs.csv"), index=False)
        # DataClass export (pkl)
        pickle.dump(crater_analysis,
                    open(out_dir.joinpath(mesh_name + ".pkl"), "wb"))
        # CS export (csv)
        crater_analysis_list.append(crater_analysis)
        plot_contour(crater_analysis)
    plot_slices(crater_analysis_list)
    generate_report.generate_report(crater_analysis_list)

# data = pd.read_csv('file', header=[0,1])
예제 #11
0
def detect_video(yolo, video_path, output_path=""):
    import cv2
    vid = cv2.VideoCapture(video_path)
    if not vid.isOpened():
        raise IOError("Couldn't open webcam or video")
    video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC))
    video_fps = vid.get(cv2.CAP_PROP_FPS)
    video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
                  int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    isOutput = True if output_path != "" else False
    if isOutput:
        print("!!! TYPE:", type(output_path), type(video_FourCC),
              type(video_fps), type(video_size))
        out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
    accum_time = 0
    curr_fps = 0
    fps = "FPS: ??"
    prev_time = timer()
    time_log = []
    label_log = []

    while True:
        return_value, frame = vid.read()
        image = Image.fromarray(frame)
        image, label_list = yolo.detect_image(image)
        result = np.asarray(image)
        curr_time = timer()
        exec_time = curr_time - prev_time
        prev_time = curr_time
        accum_time = accum_time + exec_time
        curr_fps = curr_fps + 1
        label_log.append(label_list)
        time_log.append('Timestamp: {:%Y-%m-%d %H:%M:%S}'.format(
            datetime.datetime.now()))
        if accum_time > 1:
            accum_time = accum_time - 1
            fps = "FPS: " + str(curr_fps)
            curr_fps = 0
        cv2.putText(result,
                    text=fps,
                    org=(3, 15),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=0.50,
                    color=(255, 0, 0),
                    thickness=2)
        cv2.namedWindow("result", cv2.WINDOW_NORMAL)
        cv2.imshow("result", result)
        if isOutput:
            out.write(result)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    time.sleep(1)

    output_df = pd.DataFrame({'label': label_log, 'timestamp': time_log})
    output_df.to_csv('out.csv')
    time.sleep(3)
    generate_report.generate_report()

    yolo.close_session()
예제 #12
0
from generate_sample_data import generate_sample_data
from generate_report import generate_report

if __name__ == '__main__':
    try:
        data = generate_sample_data(1000)
        generate_report(data)
    except Exception as err:
        print('error', err)
    
예제 #13
0
from data_preparation import read_data,remove_outliers,get_dummies,split_data
from train_model import train
from logger import get_logger
from generate_report import generate_report

# Logger
logger = get_logger(__name__)

if __name__ == "__main__":
    
    # Lendo dados
    df = read_data()

    # Removendo Outliers
    df = remove_outliers(df)

    # Get Dummies
    df = get_dummies(df)

    # Divindo dados
    X_train, X_test, y_train, y_test = split_data(df)

    # Treinando Modelo
    regLinear,score = train(X_train,y_train)

    # Avaliando Modelo 
    generate_report(regLinear,X_test,y_test,score)
예제 #14
0
@author: Filo
"""
from scrappers import Crag
from config import db_file
import sqlite3
import generate_report

if __name__ == "__main__":
    from generate_report import reports_dict

    crags_to_crawl = []
    for crags in reports_dict.values():
        crags_to_crawl += crags

    conn = sqlite3.connect(db_file)
    c = conn.cursor()
    for crag_id in crags_to_crawl:
        scrapper = Crag(id=crag_id)
        new_jobs = scrapper.scrap()
        scrapper.save(c)

        for job in new_jobs:
            job.scrap()
            job.save(c)

    conn.commit()
    conn.close()

    generate_report.generate_report(reports_dict)
def main(): 
    config = ConfigParser.RawConfigParser()
    config.read(sys.argv[1])
    gpath = config.get('input', 'make_ref') 
    size_limit = config.getfloat('params', 'max_size')
    files = glob.glob(gpath + "tab/*.txt")
    studies_include = config.get('params', 'studies_include')
    studies_exclude = config.get('params', 'studies_exclude').split(",")
    vartype_f = config.get('params', 'var_type')
    if studies_include == '' or studies_include == None:
        studies_include = []
    else:
        studies_include = studies_include.split(",")
    filtered = []
    start = timeit.default_timer()
    pool = mp.Pool(8)
    files = files[0:20]
    studies = [i.split("/")[-1].rstrip(".txt") for i in files]
    for i in files:
        study = i.split("/")[-1].rstrip(".txt")
        if study in studies_exclude: pass
        else:
            if (len(studies_include) == 0) or (study in studies_include):
                reader = pd.read_csv(i, sep="\t", 
                        index_col=0, dtype={'chr':'S5'})
                pool.apply_async(filter_by_size, [reader, study],
                        {'max_size': size_limit},
                        callback = lambda x: filtered.append(x))
            else: pass
    # Remove duplicated elements
    ###### Step takes around 7 minutes ###################
    pool.close()
    pool.join()
    df = pd.concat(filtered)
    print(vartype_f)
    stop = timeit.default_timer()
    print('Time to load in files and parse: {0!s}'.format(stop-start))
    p_studies = set(df.study)
    non_passed = []
    for i in studies:
        if i not in p_studies:
            non_passed.append(i)
    print(('Studies that had no variants that did '
         'not pass size filtering:{0}').format("\t".join(non_passed)))
    ############## HACK for now until we find out what is going on #
    # Get rid of the contigs for now
    df = df.ix[df.contig.isnull(), :]
    # The GRc37 to 38 multiple mapping isn't resolved need to discuss how to 
    # deal with this
    df = df.ix[np.logical_not(df.index.duplicated()),:]
    # :TODO if sstart and sstop are the same, no
    # matter if it was originally annotated as inner_start
    # or inner stop it will be collapsed
    # For now since, ignore fuzzy 
    dfd = df.drop_duplicates(['chr', 'var_type',
        'sstart', 'sstop'], inplace=False)
    new_unique_index = np.arange(dfd.shape[0])
    dfd.loc[:,'uID'] = new_unique_index
    print('new index created')
    # This step takes forever
    start = timeit.default_timer()
    groups = df.groupby('chr')
    unique_mapping = []
    pool = mp.Pool(8)
    for name, group in groups:
        pool.apply_async(generate_unique_mapping,
                args = (dfd.ix[dfd.chr == name,:], group),  
                callback=lambda x: unique_mapping.append(x))
        '''
        tgroup = dfd.ix[dfd['chr'] == name,]
        pool.apply_async(generate_unique_mapping_numba,
                args = (group.sstart.values, 
                    group.sstop.values, 
                    tgroup.sstart.values, 
                    tgroup.sstop.values, 
                    tgroup.index.values),
                callback=lambda x: unique_mapping.append(pd.Series(x,
                    index = group.index)))
        '''
    pool.close()
    pool.join()
    ns = pd.concat(unique_mapping)
    print('Time to generate mapping: {0!s}'.format((stop-start)))
    df['uID'] = ns
    report_dict = {}
    nstudies = config.getint('params', 'nstudies')
    start = timeit.default_timer()
    output = np.zeros(dfd.uID.shape[0], dtype=bool)
    embed()
    std_filter = groupby_study_numba(df.uID.values, df.study, 
            output, nstudies=nstudies) 
    print(np.sum(std_filter))
    dfd = dfd.ix[std_filter,:]
    df = df.ix[df.uID.isin(dfd.uID),:]
    dfd.to_csv(gpath + 'filtered_no_dupes.txt', sep="\t")
    df.to_csv(gpath + 'study_filtered_all.txt', sep="\t")
    print('Time to run: {0!s}'.format(stop - start))
    groups = dfd.groupby('var_type')
    from plot import plot_dists
    generate_report(report_dict)
    rpath = config.get('output', 'report_dir')
    for name, group in groups:
        plot_dists(group.sstop - group.sstart, name,
                rpath)
    type_count = dfd.groupby('var_type').agg(lambda x:
            x.shape[0]).loc[:, ['chr']]
    var_percent = type_count.ix[:,0]/float(dfd.shape[0])*100
    type_count['var_percent'] = var_percent
    type_count['var_percent'].round(2)
    report_dict['var_type_pivot'] = type_count.to_html()
    report_dict['studies'] = []
    report_dict['var_types'] = [name for name, _ in groups]
    generate_report(report_dict)