def getVideosGivenPlayList(playListID: str, topic: str, subtopic: str): #build youtube client mongo.connect() youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=config.YOUTUBE_API_KEY) res = youtube.playlistItems().list(part="snippet", playlistId=playListID, maxResults="50").execute() itemsToParse = res['items'] preParsedNodes = [] for i in itemsToParse: title = i['snippet']['title'] videoID = i['snippet']['resourceId']['videoId'] if videoID in mongo.getListVideos(topic): continue youtubeUrl = "https://www.youtube.com/watch?v=" + videoID #parse the audio file parseVideos([youtubeUrl], videoID) #convert mp3 to flac #uri = uploadToGcp("out.flac") #split the audiio file splitAudio(videoID + ".mp3") ret = transcribe_gcs() # print(ret) preParsedNodes.append(PreParseNode(youtubeUrl, videoID, title, ret)) inputData(topic, subtopic, preParsedNodes) return preParsedNodes
def init_mongo_db(module_name): """ Initialize the mongo db process """ mongodb.connect( rule_manager.get_property(None, module_name, "db_host"), rule_manager.get_property(None, module_name, "db_user"), rule_manager.get_property(None, module_name, "db_passwd"), rule_manager.get_property(None, module_name, "db_name"), )
def index_visual(): ind_dict = {"technology": ["AAPL", "MSFT"], "communication services":["GOOG", "FB"], "consumer cyclical": ["AMZN", "WMT"], "industrials": ["GE", "MMM"], "real estate": ["AMT"], "health care": ["JNJ", "PFE"], "financial": ["JPM", "V"]} industries = [] stocks = [] db_stock = connect('stock') collection_stock = db_stock.get_collection("historical") data_stock = list(collection_stock.find()) df_stock = pd.DataFrame.from_records(data_stock) for key,values in ind_dict.items(): for v in values: industries.append(key) stocks.append(v) value = [] for i in stocks: data = df_stock[df_stock.code == i].drop('_id', axis=1) value.append(*data['close'].tail(1).values) data = pd.DataFrame(dict(stock=stocks,industry=industries, close=value)) fig = px.sunburst(data,path=['industry', 'stock'],values='close') fig.update_layout(template='plotly_dark', title=None, plot_bgcolor='#23272c', paper_bgcolor='#23272c') return fig
def processRq(self, environ, start_response): mongo = connect() resp_h = HServResponse(start_response) try: path, query_args = self.parseRequest(environ) file_path = self.checkFilePath(path) #print('path="' + path) #print(query_args) if path.find('.') != -1: ret = self.fileResponse(resp_h, file_path, True) if ret is not False: return ret return ClaudiaService.request(resp_h, path, query_args, mongo, httpd) except Exception: rep = StringIO() traceback.print_exc(file = rep) log_record = rep.getvalue() logging.error( "Exception on GET request:\n " + log_record) return resp_h.makeResponse(error = 500)
# Separator elif optstr == "-s": separator = value # Username elif optstr == "-u": username = value if args == []: print 'The script needs at least a file name' print main.__doc__ sys.exit(1) cursor = None try: mongodb.connect(hostname, username, passwd, dbname) except Exception, e: print 'Unable to connect with the database' print str(e) sys.exit(1) # slurp anonymization information if anonymize_file != None: anonymize.load_data(anonymize_file) # Loop over all the given data files skip_count = 0 line_number = 0 param_list = [] for file_name in args: if not os.path.isfile(file_name):
import mongodb database = mongodb.connect() collection = database["posts"]
if field.isspace(): continue if key == '': key = field else: dict[key] = field if key.find(formula) != -1: h = False for stat in apriory: if key.find(stat) != -1: chf_ids[stat].append(number_of_card) break key = '' if h: print(number_of_card) # Record dictionary to JSON-file put("doc.json", dict, number_of_card=number_of_card, mongo=mongo) # Record of id of cards f.close() ids.sort() #print(str(len(chf_ids))+ ' documents apriory have diagnosis CHF.') put('results_apriory', chf_ids, formula=formula, mongo=mongo) if __name__ == '__main__': mongo = connect() csv_to_json(mongo) print('OK.')
import pandas as pd import plotly.graph_objects as go import numpy as np from plotly.subplots import make_subplots from mongodb import connect import requests import joblib from bs4 import BeautifulSoup # df = pd.read_csv('https://plotly.github.io/datasets/country_indicators.csv') # available_indicators = df['Indicator Name'].unique() # model # covid data db_covid = connect("covid") collection_covid = db_covid.get_collection("cases") data_covid = list(collection_covid.find()) df_covid = pd.DataFrame.from_records(data_covid).iloc[::-1] df_covid = df_covid.reset_index().drop(['index','_id'], axis=1) # stock data db_stock = connect('stock') collection_stock = db_stock.get_collection("historical") data_stock = list(collection_stock.find()) df_stock = pd.DataFrame.from_records(data_stock) codes = df_stock.code.unique() COLORS = ['rgb(637,657,687)', 'rgb(80,80,80)', 'rgb(100,100,100)', 'rgb(115,115,115)', 'rgb(135,67,69)', 'rgb(189,189,189)', 'rgb(67,80,100)', 'rgb(123,33,67)', 'rgb(138,45,69)', 'rgb(167,167,167)',