Exemple #1
0
def getVideosGivenPlayList(playListID: str, topic: str, subtopic: str):
    #build youtube client
    mongo.connect()
    youtube = build(YOUTUBE_API_SERVICE_NAME,
                    YOUTUBE_API_VERSION,
                    developerKey=config.YOUTUBE_API_KEY)
    res = youtube.playlistItems().list(part="snippet",
                                       playlistId=playListID,
                                       maxResults="50").execute()
    itemsToParse = res['items']
    preParsedNodes = []
    for i in itemsToParse:
        title = i['snippet']['title']
        videoID = i['snippet']['resourceId']['videoId']
        if videoID in mongo.getListVideos(topic):
            continue
        youtubeUrl = "https://www.youtube.com/watch?v=" + videoID
        #parse the audio file
        parseVideos([youtubeUrl], videoID)
        #convert mp3 to flac

        #uri = uploadToGcp("out.flac")
        #split the audiio file
        splitAudio(videoID + ".mp3")
        ret = transcribe_gcs()
        #       print(ret)
        preParsedNodes.append(PreParseNode(youtubeUrl, videoID, title, ret))
        inputData(topic, subtopic, preParsedNodes)
    return preParsedNodes
Exemple #2
0
def init_mongo_db(module_name):
    """
    Initialize the mongo db process
    """

    mongodb.connect(
        rule_manager.get_property(None, module_name, "db_host"),
        rule_manager.get_property(None, module_name, "db_user"),
        rule_manager.get_property(None, module_name, "db_passwd"),
        rule_manager.get_property(None, module_name, "db_name"),
    )
Exemple #3
0
def index_visual():
    ind_dict = {"technology": ["AAPL", "MSFT"],
                "communication services":["GOOG", "FB"],
                "consumer cyclical": ["AMZN", "WMT"],
                "industrials": ["GE", "MMM"],
                "real estate": ["AMT"],
                "health care": ["JNJ", "PFE"],
                "financial": ["JPM", "V"]}
    industries = []
    stocks = []
    db_stock = connect('stock')
    collection_stock = db_stock.get_collection("historical")
    data_stock = list(collection_stock.find())
    df_stock = pd.DataFrame.from_records(data_stock)
    for key,values in ind_dict.items():
        for v in values:
            industries.append(key)
            stocks.append(v)
    value = []
    for i in stocks:
        data = df_stock[df_stock.code == i].drop('_id', axis=1)
        value.append(*data['close'].tail(1).values)
    data = pd.DataFrame(dict(stock=stocks,industry=industries, close=value))
    fig = px.sunburst(data,path=['industry', 'stock'],values='close')
    fig.update_layout(template='plotly_dark',
                      title=None,
                      plot_bgcolor='#23272c',
                      paper_bgcolor='#23272c')
    return fig
Exemple #4
0
 def processRq(self, environ, start_response):
     mongo = connect()
     resp_h = HServResponse(start_response)
     try:
         path, query_args = self.parseRequest(environ)
         file_path = self.checkFilePath(path)
         #print('path="' + path)
         #print(query_args)
         if path.find('.') != -1:
             ret = self.fileResponse(resp_h, file_path, True)
             if ret is not False:
                 return ret
         return ClaudiaService.request(resp_h, path, query_args,  mongo, httpd)
     except Exception:
         rep = StringIO()
         traceback.print_exc(file = rep)
         log_record = rep.getvalue()
         logging.error(
             "Exception on GET request:\n " + log_record)
         return resp_h.makeResponse(error = 500)
        # Separator
        elif optstr == "-s":
            separator = value

        # Username
        elif optstr == "-u":
            username = value

    if args == []:
        print 'The script needs at least a file name'
        print main.__doc__
        sys.exit(1)

    cursor = None
    try:
        mongodb.connect(hostname, username, passwd, dbname)
    except Exception, e:
        print 'Unable to connect with the database'
        print str(e)
        sys.exit(1)

    # slurp anonymization information
    if anonymize_file != None:
        anonymize.load_data(anonymize_file)

    # Loop over all the given data files
    skip_count = 0
    line_number = 0
    param_list = []
    for file_name in args:
        if not os.path.isfile(file_name):
Exemple #6
0
import mongodb

database = mongodb.connect()

collection = database["posts"]
Exemple #7
0
                        if field.isspace(): continue
                        if key == '':
                            key = field
                        else:
                            dict[key] = field
                            if key.find(formula) != -1:
                                h = False
                                for stat in apriory:
                                    if key.find(stat) != -1:
                                        chf_ids[stat].append(number_of_card)
                                        break
                            key = ''
                    if h:
                        print(number_of_card)
                    # Record dictionary to JSON-file
                    put("doc.json",
                        dict,
                        number_of_card=number_of_card,
                        mongo=mongo)
            # Record of id of cards
            f.close()
            ids.sort()
            #print(str(len(chf_ids))+ ' documents apriory have diagnosis CHF.')
            put('results_apriory', chf_ids, formula=formula, mongo=mongo)


if __name__ == '__main__':
    mongo = connect()
    csv_to_json(mongo)
    print('OK.')
Exemple #8
0
import pandas as pd
import plotly.graph_objects as go
import numpy as np
from plotly.subplots import make_subplots
from mongodb import connect
import requests
import joblib
from bs4 import BeautifulSoup

# df = pd.read_csv('https://plotly.github.io/datasets/country_indicators.csv')

# available_indicators = df['Indicator Name'].unique()
# model

# covid data
db_covid = connect("covid")
collection_covid = db_covid.get_collection("cases")
data_covid = list(collection_covid.find())
df_covid = pd.DataFrame.from_records(data_covid).iloc[::-1]
df_covid = df_covid.reset_index().drop(['index','_id'], axis=1)

# stock data
db_stock = connect('stock')
collection_stock = db_stock.get_collection("historical")
data_stock = list(collection_stock.find())
df_stock = pd.DataFrame.from_records(data_stock)
codes = df_stock.code.unique()


COLORS = ['rgb(637,657,687)', 'rgb(80,80,80)', 'rgb(100,100,100)', 'rgb(115,115,115)', 'rgb(135,67,69)',
          'rgb(189,189,189)', 'rgb(67,80,100)', 'rgb(123,33,67)', 'rgb(138,45,69)', 'rgb(167,167,167)',