コード例 #1
0
def get_movement_sizes(bucket_name, target_cat, l):

    total_image_count = 0  # progress bar stuff -------------------
    print("Calculating movement sizes...")
    print_progress_bar(total_image_count, l, prefix='Progress:')

    movement_size_dict = {}  # empty movement:size dictionary
    movement_count = 0  # stores the tentative count of images in the current movement
    prev_movement = ""  # stores the name of the movement associated with the previous file

    blobs = storage.Client().list_blobs(
        bucket_name)  # get all filepaths in the GCP bucket
    for blob in blobs:  # and loop through every file path
        category, movement_name = get_category_and_movement(
            blob.name)  # get movement of current image
        if (category == target_cat):  # if images are in target category:
            if (movement_name !=
                    prev_movement):  # if we have started a new movement:
                if (prev_movement !=
                        ""):  #    / check that one was just completed
                    movement_size_dict[
                        prev_movement] = movement_count  #     \ and if so update the dictionary.
                prev_movement = movement_name  # update the previous movement to current one
                movement_count = 0  # and reset the movement image count

            movement_count += 1  # add 1 to the number of images in current movement

            total_image_count += 1  # progress bar stuff ------------------
            print_progress_bar(total_image_count, l, prefix='Progress:')

    movement_size_dict[
        prev_movement] = movement_count  # add the key:value pair of the last movement

    return movement_size_dict
コード例 #2
0
def vis_focused_grid(kernels):
    res = 1000
    lats_lngs = []
    lats_lngs.append(np.mgrid[0.555:0.612:res * 1j, -1.265:-1.248:res * 1j])
    lats_lngs.append(np.mgrid[-0.565:-0.46:res * 1j, 1.46:1.6:res * 1j])
    lats_lngs.append(np.mgrid[-0.5:0.2:res * 1j, -0.85:-0.7:res * 1j])
    lats_lngs.append(np.mgrid[0:0.17:res * 1j, -0.78:-0.72:res * 1j])
    lats_lngs.append(np.mgrid[-0.65:-0.4:res * 1j, -0.6:-0.15:res * 1j])
    lats_lngs.append(np.mgrid[0.445:0.695:res * 1j, -1.2845:-1.2305:res * 1j])

    for j, (lat, lng) in enumerate(lats_lngs[0:1]):
        pos = np.dstack((lng, lat))
        logger.info("%sx%s Grid created.", res, res)

        heatmap = np.zeros((res, res))
        T = len(kernels)
        percent = T // 100
        for i, k in enumerate(kernels):
            if (i + 1) % percent == 0 or (i + 1) == T:
                print_progress_bar(i + 1,
                                   T,
                                   prefix='Progress:',
                                   suffix='Complete',
                                   length=50)
            np.add(heatmap, k.pdf(pos), heatmap)
        logger.info("Probabilities for grid calculated.")

        hlp.save_array(heatmap,
                       "combined_gp_heat_focused/{}_{}x{}".format(j, res,
                                                                  res), logger)
        plot_heatmap(heatmap,
                     identifier="_focused/{}_{}x{}".format(j, res, res),
                     show_title=False,
                     with_alpha=True)
コード例 #3
0
def run(path):
    logger.info("Starting execution of file_lengths.py!")
    event_counts = []
    file_sizes = []
    files = [f for f in os.listdir(path) if ".log" in f]
    T = len(files)
    percent = T // 100 if T >= 100 else 1
    for i, file_name in enumerate(files):
        if (i + 1) % percent == 0 or (i + 1) == T:
            hlp.print_progress_bar(i + 1, T, length=50)
        event_counts.append(hlp.file_len(path + file_name))
        file_sizes.append(os.path.getsize(path + file_name) / 1e9)
    logger.info("Total Size: %sGB", sum(file_sizes))

    fig = plt.figure()
    plt.subplot(121)
    plt.boxplot(event_counts, showmeans=True)
    plt.title("Number of Events")

    plt.subplot(122)
    plt.boxplot(file_sizes, showmeans=True)
    plt.title("Data Log Sizes (GB)")

    fig.suptitle("Measurements on {} Logs".format(len(event_counts)))
    fig.subplots_adjust(left=None,
                        bottom=None,
                        right=None,
                        top=None,
                        wspace=None,
                        hspace=0.3)
    plt.savefig("log_sizes.png")
def read_events_from_file_old(file_name,
                              vehicle_id=None,
                              max_events=None,
                              create_timeline=False,
                              filter_vehicle="Bus",
                              group_by_id=True):
    """Functions that takes a file_name and returns events in the form of key-value objects.

    Parameter "vehicle_id" is an optional parameter that is used to filter 
    results to only contain events for the given vehicle_id.

    Returns a list of objects containing data in a key-value format.
    """
    if group_by_id:
        events = defaultdict(dict)
    else:
        events = defaultdict(list)
    timeline_events = []
    if max_events is None:
        logger.info("Calculating number of events in file...")
        T = file_len(file_name)
        logger.info("File has %i events", T)
    else:
        T = max_events
    percent = T // 100
    with open(file_name, 'r', encoding="latin-1") as f:
        for i in range(T):
            if (i + 1) % percent == 0 or (i + 1) == T:
                print_progress_bar(i + 1,
                                   T,
                                   prefix='Progress:',
                                   suffix='Complete',
                                   length=50)

            event = parse_event(f.readline(), filter_vehicle)
            if event is None:
                continue

            event_type = event["event.type"]
            event_v_id = event["vehicle.id"]

            if vehicle_id is None or vehicle_id == event_v_id:
                if group_by_id:
                    if not event_type in events[event_v_id]:
                        events[event_v_id][event_type] = []
                    events[event_v_id][event_type].append(event)
                else:
                    events[event_type].append(event)
                if create_timeline:
                    if not timeline_events or timeline_events[len(
                            timeline_events) - 1]["event.type"] != event_type:
                        timeline_events.append(event)
    return events, timeline_events
コード例 #5
0
def mass_dm_followers(
    username: str,
    message: str,
    rank_by: str = "recent",
    value: str = "",
    dry_run: bool = True,
    api: tweepy.API = None,
):
    """
    Send mass DM to all followers in order of specificed ranking and set
    dm_sent flag for that user to True. Allows for dry run where messages
    are not actually sent out and dm_sent flag is not changed.

    params:
        username(str) - user of followers to DM
        message(str) - message to send out
        rank_by(str) - ranking method
        value(str) - value to search for. only used for location 
                     and description filter
        dry_run(bool) - set to True to only pretend to send messages
        api(tweepy.API) - tweepy api instance
    """
    user = User.query.filter_by(username=username).first()
    try:
        followers = ranked_followers(username, rank_by, value)
    except Exception as e:
        print(e)
        bye()
    total_followers = len(followers)
    if not total_followers:
        print("No followers matched your criteria :(")
        bye()
    print()
    if dry_run:
        print(
            "Dry run is ON. Messages are not actually being sent. Phew. Add the --real flag to send DMs"
        )
    print("Sending message to {} followers".format(total_followers),
          end="\n\n")
    for i, follower in enumerate(followers):
        print("\033[F\033[KSending DM to {}".format(follower.screen_name))
        print_progress_bar(i + 1, total_followers, suffix="Sent")
        if dry_run:
            time.sleep(0.01)
        else:
            send_message(follower.id_str, message,
                         api)  # Comment this out if testing
            db.session.query(Follower).filter_by(id_str=follower.id_str,
                                                 user_id=user.id).update(
                                                     {"dm_sent": True})
            db.session.commit()
コード例 #6
0
def write_csv_file(filename, category, pct_train, pct_valid):

    print("Retreiving blobs from GCP...")
    blobs = storage.Client().list_blobs(
        "art-translated-rvf")  # get all filepaths in the GCP bucket/folder

    if (category == 'fakey'):
        num_images_in_category = 26884  # number of images in current category
    else:
        num_images_in_category = 34485  # number of images in current category

    num_train = int(
        (pct_train / 100) * num_images_in_category)  # convert percentages
    num_valid = int(
        (pct_valid / 100) * num_images_in_category)  # to integer counts
    img_count = 0  # stores the tentative count of images in the current category

    print("\nWriting " + category + " images to .csv file...")  #
    print_progress_bar(0,
                       num_images_in_category,
                       prefix='Progress:',
                       suffix='lines written')

    for blob in blobs:  # and loop through every file path
        if (get_category(blob.name) == category):  #
            if img_count in range(0, num_train):  # .csv 0 -- dataset
                dataset = "TRAIN"  #     assign images to
            elif img_count in range(num_train, num_train +
                                    num_valid):  #     TRAIN, VALIDATE, or TEST
                dataset = "UNASSIGNED"  #     datasets according to the
            else:  #     percentages set above
                dataset = "TEST"  #
            directory = "gs://art-translated-rvf/" + blob.name  # .csv 1 -- google cloud directory
            label = category  # .csv 2 -- AutoML classification label

            csv_list = [dataset, directory, label
                        ]  # create list of values for next line of .csv file

            write_csv_line(
                filename,
                csv_list)  # write the values to a new line of the file

            img_count += 1  # update image count and print the updated progress bar
            print_progress_bar(img_count,
                               num_images_in_category,
                               prefix='Progress:',
                               suffix='lines written')

    print("\nLook for", filename, "in the parent directory.")
def read_events_from_file(file_name,
                          skip_n=0,
                          max_events=None,
                          geofence=None,
                          vehicle_id=None):
    """Opens a file containing events and parses them.
    Checks if a journey has begun and saves all the position updates from the bus on that journey.
    Bus stops stopped at or passed are also recorded.
    """
    if max_events is None:
        logger.info("Calculating number of events in file...")
        T = file_len(file_name)
        logger.info("File has %i events", T)
        T -= skip_n
    else:
        T = max_events
    percent = T // 100

    if vehicle_id is not None and not isinstance(vehicle_id, list):
        vehicle_id = [vehicle_id]

    events = defaultdict(list)

    with open(file_name, 'r', encoding="latin-1") as f:
        logger.info("Skipping %s events", skip_n)
        for _ in range(skip_n):
            f.readline()
        for i in range(T):
            if (i + 1) % percent == 0 or (i + 1) == T:
                print_progress_bar(i + 1,
                                   T,
                                   prefix='Progress:',
                                   suffix='Complete',
                                   length=50)
            event = parse_event(f.readline())

            if event is None:
                continue
            if vehicle_id is not None and event["vehicle.id"] not in vehicle_id:
                continue
            if geofence is not None and not is_inside(event, *geofence):
                continue

            assert isinstance(event["date"], datetime.datetime)
            events[event["vehicle.id"]].append(event)

    for k, v in events.items():
        v.sort(key=lambda e: e["event.id"])
    return events
コード例 #8
0
def write_csv_file(filename, l, pct_train, pct_valid):

    write_csv_header(filename) # write the header of the .csv file

    movement_size_dict = get_movement_sizes("art-translated-rvf", 'fakey', l) # get the number of images in each movement
    movement_count = 0 # stores the tentative count of images in the current movement
    prev_movement = "" # stores the name of the movement associated with the previous file
    img_count = 0;

    blobs = storage.Client().list_blobs("art-translated-rvf") # get all filepaths in the GCP bucket/folder

    print("\nWriting images to .csv file...") #
    print_progress_bar(0, l, prefix = 'Progress:', suffix = 'lines written')

    for blob in blobs:                                                  # loop through every file path
        category, movement_name = get_category_and_movement(blob.name) # retreive category and movement



        if (category == 'fakey'):
            if (movement_name != prev_movement): # if we have started a new movement,
                prev_movement = movement_name   # update the previous movement to current one
                movement_count = 0             # and reset the movement image count

            num_images = movement_size_dict[movement_name] # number of images in current movement
            num_train = int((pct_train/100) * num_images) # convert percentages
            num_valid = int((pct_valid/100) * num_images) # to integer counts

            if movement_count in range(0, num_train):                         # .csv 0 -- dataset
                dataset = "TRAIN"                                            #     assign images to
            elif movement_count in range(num_train, num_train + num_valid): #     TRAIN, VALIDATE, or TEST
                dataset = "UNASSIGNED"                                     #     datasets according to the
            else:                                                         #     percentages set above
                dataset = "TEST"                                         #
            directory = "gs://art-translated-rvf/" + blob.name          # .csv 1 -- google cloud directory
            label = movement_name                                      # .csv 2 -- AutoML classification label

            csv_list = [dataset, directory, label] # create list of values for next line of .csv file
            movement_count += 1                   # increment the number of images in current movement

            write_csv_line(filename, csv_list) # write the values to a new line of the file

            img_count += 1 # update image count and print the updated progress bar
            print_progress_bar(img_count, l, prefix = 'Progress:', suffix = 'lines written')

    print("\nLook for", filename, "in the parent directory.")
コード例 #9
0
def vis_whole_grid(kernels):
    res = 7500
    lat, lng = np.mgrid[-1.7:2:res * 1j, -1.35:1.65:res * 1j]
    pos = np.dstack((lng, lat))
    logger.info("%sx%s Grid created.", res, res)

    heatmap = np.zeros((res, res))
    T = len(kernels)
    percent = T // 100
    for i, k in enumerate(kernels):
        if (i + 1) % percent == 0 or (i + 1) == T:
            print_progress_bar(i + 1,
                               T,
                               prefix='Progress:',
                               suffix='Complete',
                               length=50)
        np.add(heatmap, k.pdf(pos), heatmap)
    logger.info("Probabilities for grid calculated.")

    hlp.save_array(heatmap, "heatmap_{}x{}".format(res, res), logger)
    plot_heatmap(heatmap)
コード例 #10
0
def get_event_types_list(file_name, max_events=None):
    if max_events is None:
        logger.info("Calculating number of events in file...")
        T = file_len(file_name)
        logger.info("File has %i events", T)
    else:
        T = max_events
    percent = T // 100
    types = []
    with open(file_name, 'r', encoding="latin-1") as f:
        for i in range(T):
            if (i + 1) % percent == 0 or (i + 1) == T:
                print_progress_bar(i + 1,
                                   T,
                                   prefix='Progress:',
                                   suffix='Complete',
                                   length=50)

            event_type = get_event_type(f.readline())
            types.append(event_type)
    return types
コード例 #11
0
def fetch_followers(username: str, api: tweepy.API):
    """
    Use tweepy to fetch user's followers' ids and then fetch their user objects
    and save to the db.

    params:
        username(str) - username of user to fetch followers for
        api(tweepy.API) - tweepy api instance
    """
    total_followers = api.me().followers_count
    print("Fetching {} followers".format(total_followers))
    db.create_all()
    follower_ids = []
    print("Fetching follower ids!")
    for id in rate_limit_handler(
            tweepy.Cursor(api.followers_ids, count=5000).items()):
        follower_ids.append(id)
    print("Fetching user objects from ids!")
    for list_of_100 in list(divide_into_chunks(follower_ids, 100)):
        for i, follower in enumerate(api.lookup_users(user_ids=list_of_100)):
            follower_dict = dict(
                (k, follower.__dict__[k]) for k in follower_keys)
            user = User.query.filter_by(username=username).first()
            if not user:
                user = User(username=username)
            follower = Follower(**follower_dict)
            user.followers.append(follower)
            db.session.add(user)
            db.session.commit()
            print_progress_bar(
                i + 1,
                total_followers,
                prefix="Fetching {}/{} Followers".format(
                    i + 1, total_followers),
                suffix="Fetched",
            )
    print("Done!")