TESTING = False url = sys.argv[1] if len(sys.argv) == 3: if sys.argv[2] == 'log': TESTING = True if validators.url(sys.argv[1]): url = sys.argv[1] if len(sys.argv) == 2: url = trump_video if validators.url(sys.argv[1]): url = sys.argv[1] if sys.argv[1] == 'log': TESTING = True print("IS TEST:", TESTING) engine = getEngine(TESTING) print("Link: ", url) window_size = 4 overlap = 2 column_name = 'word' video = videoHelper.videoObject(url, window_size, overlap, TESTING) #Check if video already exists url_exists = columnForURLFilled(url, engine, column_name) if url_exists == True: print("This video is already in the database. Will overwrite ", column_name, " data.") video.getAudio() print('Getting text analysis...') video.getTextAnalysis()
from lib.etl.sqlConnection import sqlConnectionSetup, urlExists, columnForURLFilled, getEngine, getTableAsDf from scipy.signal import find_peaks import plotly.graph_objects as go from scipy.signal import argrelextrema import numpy as np # Run unit tests first os.system('python test.py -v') IS_TEST = False if len(sys.argv) == 2: if sys.argv[1] == 'log': IS_TEST = True print("IS TEST:", IS_TEST) engine = getEngine(IS_TEST) print('Getting labelled DF ...') # Getting the labelled table as pandas df. labelled_df = getTableAsDf("labelled", engine) # Hackedy hack ... clean_labelled_df = labelled_df.rename(columns={'start': 'start_time_s'}) # Get the list of urls for which amplitude is NOT NULL and is_amplitude_peak is NULL. filtered_labelledDF = labelled_df[(labelled_df['amplitude'].notnull()) & (labelled_df['is_amplitude_peak'].isnull())] url_list = filtered_labelledDF.url.unique() if len(url_list) == 0: print('All videos already have amplitude peak tags!') else: print('Going through videos with is_amplitude_peak tag ...') column_name = 'is_amplitude_peak' for url in tqdm(url_list):
from sqlalchemy.orm import sessionmaker import lib.etl.sqlConnection as sqlConnection from sklearn.cluster import KMeans from scipy.spatial import Voronoi, voronoi_plot_2d, cKDTree import matplotlib.pyplot as plt import numpy as np import math engine = sqlConnection.getEngine(False) Session = sessionmaker(bind=engine) session = Session() def customKMeans(table_name, k_value, column_list, prediction_observation): kMeansDf = sqlConnection.getTableAsDf(table_name) kMeansDf_formatted = kMeansDf[column_list] kmeans = KMeans(n_clusters=k_value, random_state=0).fit(kMeansDf_formatted) # Take all the cluster centers and find the 'highlight' cluster using a heuristic # Assumption: The further the centroid of a cluster is from the origin o(i, j ... n) the more ... # highlight likely the moment is. max_d_from_origin = 0 # highlight centroid is the list of coordinates of the centre of the voronoi cell in our ... # k-means hyperplane that highlight_centroid = kmeans.cluster_centers_[0] for center in kmeans.cluster_centers_: total_dst = 0 for i in range(len(center)):