Ejemplo n.º 1
0
TESTING = False
url = sys.argv[1]
if len(sys.argv) == 3:
    if sys.argv[2] == 'log':
        TESTING = True
    if validators.url(sys.argv[1]):
        url = sys.argv[1]
if len(sys.argv) == 2:
    url = trump_video
    if validators.url(sys.argv[1]):
        url = sys.argv[1]
    if sys.argv[1] == 'log':
        TESTING = True

print("IS TEST:", TESTING)
engine = getEngine(TESTING)

print("Link: ", url)
window_size = 4
overlap = 2
column_name = 'word'
video = videoHelper.videoObject(url, window_size, overlap, TESTING)

#Check if video already exists
url_exists = columnForURLFilled(url, engine, column_name)
if url_exists == True:
    print("This video is already in the database. Will overwrite ",
          column_name, " data.")
video.getAudio()
print('Getting text analysis...')
video.getTextAnalysis()
Ejemplo n.º 2
0
from lib.etl.sqlConnection import sqlConnectionSetup, urlExists, columnForURLFilled, getEngine, getTableAsDf
from scipy.signal import find_peaks
import plotly.graph_objects as go
from scipy.signal import argrelextrema
import numpy as np

# Run unit tests first
os.system('python test.py -v')

IS_TEST = False
if len(sys.argv) == 2:
    if sys.argv[1] == 'log':
        IS_TEST = True

print("IS TEST:", IS_TEST)
engine = getEngine(IS_TEST)

print('Getting labelled DF ...')
# Getting the labelled table as pandas df.
labelled_df = getTableAsDf("labelled", engine)
# Hackedy hack ...
clean_labelled_df = labelled_df.rename(columns={'start': 'start_time_s'})
# Get the list of urls for which amplitude is NOT NULL and is_amplitude_peak is NULL.
filtered_labelledDF = labelled_df[(labelled_df['amplitude'].notnull()) & (labelled_df['is_amplitude_peak'].isnull())]
url_list = filtered_labelledDF.url.unique()
if len(url_list) == 0:
    print('All videos already have amplitude peak tags!')
else:
    print('Going through videos with is_amplitude_peak tag ...')
    column_name = 'is_amplitude_peak'
    for url in tqdm(url_list):
Ejemplo n.º 3
0
from sqlalchemy.orm import sessionmaker
import lib.etl.sqlConnection as sqlConnection
from sklearn.cluster import KMeans
from scipy.spatial import Voronoi, voronoi_plot_2d, cKDTree
import matplotlib.pyplot as plt
import numpy as np
import math

engine = sqlConnection.getEngine(False)
Session = sessionmaker(bind=engine)
session = Session()


def customKMeans(table_name, k_value, column_list, prediction_observation):
    kMeansDf = sqlConnection.getTableAsDf(table_name)
    kMeansDf_formatted = kMeansDf[column_list]

    kmeans = KMeans(n_clusters=k_value, random_state=0).fit(kMeansDf_formatted)

    # Take all the cluster centers and find the 'highlight' cluster using a heuristic
    # Assumption: The further the centroid of a cluster is from the origin o(i, j ... n) the more ...
    # highlight likely the moment is.

    max_d_from_origin = 0
    # highlight centroid is the list of coordinates of the centre of the voronoi cell in our ...
    # k-means hyperplane that
    highlight_centroid = kmeans.cluster_centers_[0]

    for center in kmeans.cluster_centers_:
        total_dst = 0
        for i in range(len(center)):