def __init__(self, root):
        #
        # get the initial configuration
        self.root = root
        self.configsheet_url = 'https://docs.google.com/spreadsheets/d/1gwtQlzk0iA4qyLzqaYEk5SggOqNZtJnSSfwnZYDNlAw/export?format=csv&gid={SheetId}&run=1'
        sourceConfigDF = pd.read_csv(
            self.configsheet_url.replace('{SheetId}', '284194018')).fillna('')
        sourceConfigDF['enriched'] = False
        self.sourcesConfigDict = ProUtils.pandas_df_to_dict(
            sourceConfigDF, 'Configname')
        self.sport_configs = {}
        self.TRUE = True

        #
        # read IMDB title definitions
        titleTypesDF = pd.read_csv(
            self.configsheet_url.replace('{SheetId}', '1802180540')).fillna('')
        self.titletypesConfigDict = ProUtils.pandas_df_to_dict(
            titleTypesDF, 'TitleType')

        #print(sourceConfig)

        self.consumerStatus = multiprocessing.Queue()
        self.sentinel = 'Done'
        self.bqUtils = BigqueryUtils()
    def imdbQueriesGenerator(self, queriesQueue, sourceConfig, startTime):

        #
        # create jobs for all relevant metrics.
        for statDef in sourceConfig['StatsDefDict'].values():

            if statDef['Doit'] != 'y':
                continue

            #print('Metric: {}, Sport:{}, Delta time: {}'.format(statDef['StatName'], statDef['SportCode'], dt.now() - startTime), flush=True)

            for titleType in statDef['TitleType'].split(','):
                titletypeConfig = self.titletypesConfigDict[titleType]
                if statDef['Genres'] == 'y':
                    genresList = titletypeConfig['GenresList'].split(',')
                else:
                    genresList = ['All']

                for genre in genresList:
                    _statDef = statDef.copy()
                    query = sourceConfig['query']
                    if genre == 'All':
                        _statDef['StatCondition'] = ''
                    else:
                        _statDef[
                            'StatCondition'] = 'AND STRPOS(Genres, "{}")>0'.format(
                                genre)
                        _statDef['StatName'] = '{}.{}'.format(
                            statDef['StatName'], genre)
                    _statDef['TitleType'] = titleType
                    _statDef['Genre'] = genre
                    _statDef['StatObject'] = titleType
                    query = ProUtils.format_string(query, _statDef)
                    query = ProUtils.format_string(query, sourceConfig)
                    query = ProUtils.format_string(query, titletypeConfig)
                    #print (query)
                    #
                    # define the destination table
                    instructions = _statDef
                    instructions['StatTimeframe'] = sourceConfig[
                        'StatTimeframe']
                    instructions['StatSource'] = sourceConfig['StatSource']
                    targetTable = ProUtils.format_string(
                        targetTableFormat,
                        instructions).replace('.', '_').replace('-', '_')
                    jobDefinition = {
                        'params': {
                            'query': query,
                            'targetDataset': targetDataset,
                            'targetTable': targetTable,
                        },
                        'StatName': _statDef['StatName'],
                        'StatObject': titleType,
                        'StatTimeframe': sourceConfig['StatTimeframe']
                    }
                    queriesQueue.put(jobDefinition)
Beispiel #3
0
    def __init__(self):
        self.configsheet_url = 'https://docs.google.com/spreadsheets/d/1hsTL7TzdtwPTBe5ZXs4PN1McQ4h5-NctW_KnV9sZQfo/export?format=csv&gid={SheetId}&run=1'
        contentConfig_df = pd.read_csv(
            self.configsheet_url.replace('{SheetId}', '742083111')).fillna('')
        self.contentConfigDict = ProUtils.pandas_df_to_dict(
            contentConfig_df, 'ContentDefCode')
        domainsDF = pd.read_csv(
            self.configsheet_url.format(SheetId='103209122')).fillna('')
        self.domainsDict = ProUtils.pandas_df_to_dict(domainsDF, 'Domain')
        templates_df = pd.read_csv(
            self.configsheet_url.replace('{SheetId}', '2085630088')).fillna('')
        self.templateDefsDict = ProUtils.pandas_df_to_dict(
            templates_df, 'TemplateName')

        return
Beispiel #4
0
 def get_configuration_dict(self, contentConfigCode):
     contentConfig = self.contentConfigDict[contentConfigCode]
     #
     # Getting the insight-configuration data, setting common configurations and writing to BQ..
     config_url = self.configsheet_url.format(**contentConfig)
     insightConfig_df = pd.read_csv(config_url).fillna('')
     return ProUtils.pandas_df_to_dict(insightConfig_df, 'QuestionCode')
Beispiel #5
0
 def lists_generator(self, contentConfigCode):
     #
     # Save the insights configuration to BQ
     configTableId = self.icm.save_configuration_to_bigquery(
         contentConfigCode)
     #
     # read the query, configure and run it.
     instructions = self.icm.get_content_config(contentConfigCode)
     instructions['InsightsConfigurationTable'] = configTableId
     instructions['StatFilter'] = self.calc_filter(
         instructions['StatFilter'])
     instructions['QuestionsFilter'] = self.calc_filter(
         instructions['QuestionsFilter'])
     query = self.get_lists_query(instructions['SportCode'])
     query = ProUtils.format_string(query, instructions)
     #print("Running query:\n" + query, flush=True)
     #
     # Execute the query.
     dataset_id, table_id = self.get_lists_dataset_and_table(
         contentConfigCode)
     queryFile = 'results/queries/{}.sql'.format(table_id)
     f = open(queryFile, 'w')
     f.write(query)
     f.close()
     nItems = self.bqu.execute_query_with_schema_and_target(
         query, dataset_id, table_id)
     return nItems
    def get_source_configuration(self, configName):
        sourceConfig = self.sourcesConfigDict[configName]
        if sourceConfig['DoIT'] != 'y' or sourceConfig['enriched'] == True:
            return sourceConfig

        sheetId = sourceConfig['SportSheetId']
        #
        # read all relevant metrics
        if sheetId not in self.sport_configs.keys():
            self.sport_configs[sheetId] = pd.read_csv(
                self.configsheet_url.replace(
                    '{SheetId}', str(sourceConfig['SportSheetId']))).fillna('')
            self.sport_configs[sheetId]['SportCode'] = sourceConfig[
                'SportCode']

        sourceConfig['StatsDefDict'] = ProUtils.pandas_df_to_dict(
            self.sport_configs[sheetId], 'StatName')

        if 'query' not in sourceConfig.keys():
            sourceConfig['query'] = open(
                self.root + '/Queries/' + sourceConfig['QueryFile'],
                'r').read()

        sourceConfig['enriched'] = True
        self.sourcesConfigDict[configName] = sourceConfig
        return sourceConfig
    def financeQueriesGenerator(self, queriesQueue, sourceConfig, startTime):
        #
        # target table definitions
        financeTableFormat = 'Stat_Finance_{StatSource}_{StatName}_{StatObject}_Rolling_{RollingDays}'
        financeStatsDataset = 'Finance_Stats'
        self.bqUtils.create_dataset(financeStatsDataset)

        #
        # create jobs for all relevant metrics.
        for statDef in sourceConfig['StatsDefDict'].values():

            if statDef['Doit'] != 'y':
                continue

            #print('Metric: {}, Sport:{}, Delta time: {}'.format(statDef['StatName'], statDef['SportCode'], dt.now() - startTime), flush=True)

            for statObject in statDef['StatObject'].split(',')[:1]:
                for rollingDays in statDef['RollingDaysList'].split(','):
                    _statDef = statDef.copy()
                    _statDef['StatObject'] = statObject
                    rollingDaysInst = {'RollingDays': rollingDays}
                    query = sourceConfig['query']
                    query = ProUtils.format_string(query, _statDef)
                    query = ProUtils.format_string(query, sourceConfig)
                    query = ProUtils.format_string(query, rollingDaysInst)
                    #print (query)
                    #
                    # define the destination table
                    instructions = _statDef
                    instructions['StatTimeframe'] = sourceConfig[
                        'StatTimeframe']
                    instructions['StatSource'] = sourceConfig['StatSource']
                    instructions['RollingDays'] = rollingDays
                    targetTable = ProUtils.format_string(
                        financeTableFormat,
                        instructions).replace('.', '_').replace('-', '_')
                    jobDefinition = {
                        'params': {
                            'query': query,
                            'targetDataset': financeStatsDataset,
                            'targetTable': targetTable,
                        },
                        'StatName': _statDef['StatName'],
                        'StatObject': statObject,
                        'StatTimeframe': '{}_Rollingdays'.format(rollingDays)
                    }
                    queriesQueue.put(jobDefinition)
    def two_answers_question_generator(self, questionDict, configDef):
        #print(questionDict)
        stat1 = questionDict['Stat1']
        stat2 = questionDict['Stat2']
        questionTemplate = stat1['Question2Objects']
        questionInstructions = stat1
        timeFrameTexts = configDef['TimeframeText'].split(',')
        loc = random.randint(0, len(timeFrameTexts) - 1)
        questionInstructions['Timeframe'] = timeFrameTexts[loc]
        questionText = ProUtils.format_string(questionTemplate,
                                              questionInstructions)
        templateDict = self.icm.templateDefsDict

        outQuestion = {
            'QuestionText':
            questionText,
            'Answer1':
            stat1['StatObjectName'],
            'Answer2':
            stat2['StatObjectName'],
            'Value1':
            str(
                eval(templateDict[stat1['Value1Template']]['Template'].replace(
                    '{value}', "stat1['StatValue']"))),
            'Value2':
            str(
                eval(templateDict[stat2['Value1Template']]['Template'].replace(
                    '{value}', "stat2['StatValue']"))),
        }
        questionKeys = [
            'ContentDefCode', 'SportCode', 'StatSource', 'slotNum', 'rankDiff',
            'StatObject', 'StatTimeframe', 'LeagueCode', 'SeasonCode',
            'CompetitionStageCode', 'MatchStageCode', 'QuestionCode',
            'StatCode', 'Description', 'numRanks', 'rankItemsCount',
            'valueRange', 'internalDenseRank', 'objectsCount', 'minValue',
            'maxValue'
        ]
        statKeys = [
            'StatObjectName', 'StatFunction', 'MatchCode', 'TeamCode',
            'PlayerCode', 'StatValue', 'Count', 'DenseRank', 'TeamName',
            'PlayerName'
        ]
        ProUtils.update_dict(outQuestion, stat1, questionKeys)
        ProUtils.update_dict(outQuestion, questionDict, questionKeys)
        ProUtils.update_dict(outQuestion, stat1, statKeys, '1')
        ProUtils.update_dict(outQuestion, stat2, statKeys, '2')

        return outQuestion
 def games_days_range(self, interval, prev):
     instructions = {}
     startDate = (dt.today() - timedelta(days=interval + prev - 1))
     endDate = (dt.today() - timedelta(days=prev))
     condTemplate = '{DateProperty} BETWEEN "{StartDate}" and "{EndDate}"'
     condInst = {
         'StartDate': startDate.strftime('%Y%m%d'),
         'EndDate': endDate.strftime('%Y%m%d')
     }
     instructions['StatCondition'] = ProUtils.format_string(
         condTemplate, condInst)
     instructions['DaysRange'] = 'N/A'
     return instructions
    def imdbQuestionsDefGenerator(self):
        #
        # create jobs for all relevant metrics.
        questionsList = []
        sourceConfig = self.get_source_configuration('Entertainmant.IMDB')

        for statDef in sourceConfig['StatsDefDict'].values():

            for titleType in statDef['TitleType'].split(','):
                titletypeConfig = self.titletypesConfigDict[titleType]
                if statDef['Genres'] == 'y':
                    genresList = titletypeConfig['GenresList'].split(',')
                else:
                    genresList = ['All']

                for genre in genresList:
                    questionDef = {}
                    questionDef['QuestionCode'] = '{}.{}'.format(
                        titleType, statDef['StatName'])
                    questionDef['StatName'] = statDef['StatName']
                    questionDef['StatObject'] = titleType
                    questionDef['Genre'] = ''
                    questionDef['TitleType'] = titleType
                    questionDef['Level'] = 'Easy'
                    questionDef['Value1Template'] = statDef['Value1Template']
                    questionDef['Value2Template'] = statDef['Value2Template']
                    questionDef['ObjectDisplayName'] = titletypeConfig[
                        'ObjectDisplayName']

                    questionDef['QuestionNObjects'] = ''
                    if genre != 'All':
                        questionDef['QuestionCode'] = '{}.{}'.format(
                            questionDef['QuestionCode'], genre)
                        questionDef['StatName'] = '{}.{}'.format(
                            questionDef['StatName'], genre)
                        questionDef['Genre'] = genre + ' '

                    questionDef['Question2Objects'] = ProUtils.format_string(
                        statDef['Question2Objects'], questionDef)
                    questionsList.append(questionDef)

        keys = [
            'QuestionCode', 'StatName', 'Genre', 'Level', 'ObjectDisplayName',
            'Question2Objects', 'QuestionNObjects', 'StatObject', 'TitleType',
            'Value1Template', 'Value2Template'
        ]
        questionsDF = pd.DataFrame(questionsList, columns=keys)
        questionsDF.to_csv('imdb_questionsList.csv')
    def complexQueriesGenerator(self, queriesQueue, sourceConfig, startTime):
        #
        # create jobs for all relevant metrics.
        for statDef in sourceConfig['StatsDefDict'].values():

            if statDef['Doit'] != 'y':
                continue

            #print('Metric: {}, Sport:{}, Delta time: {}'.format(statDef['StatName'], statDef['SportCode'], dt.now() - startTime), flush=True)
            inst = {}
            inst['StatTimeframes'] = ProUtils.commastring_to_liststring(
                statDef['StatTimeframes'])
            inst['StatObjects'] = ProUtils.commastring_to_liststring(
                statDef['StatObjects'])
            inst['NumeratorStatNames'] = ProUtils.commastring_to_liststring(
                statDef['NumeratorStatNames'])
            inst['DenominatorStatNames'] = ProUtils.commastring_to_liststring(
                statDef['DenominatorStatNames'])
            query = sourceConfig['query']
            query = ProUtils.format_string(query, inst)
            query = ProUtils.format_string(query, statDef)
            query = ProUtils.format_string(query, sourceConfig)
            #print (query)
            #
            # define the destination table
            instructions = statDef
            instructions['StatObject'] = statDef['StatObjects'].replace(
                ',', '_')
            instructions['StatTimeframe'] = statDef['StatTimeframes'].replace(
                ',', '_')
            instructions['StatSource'] = sourceConfig['StatSource']
            targetTable = ProUtils.format_string(targetTableFormat,
                                                 instructions).replace(
                                                     '.', '_')
            jobDefinition = {
                'params': {
                    'query': query,
                    'targetDataset': targetDataset,
                    'targetTable': targetTable,
                },
                'StatName': statDef['StatName'],
                'StatObject': instructions['StatObject'],
                'StatTimeframe': instructions['StatTimeframe']
            }
            queriesQueue.put(jobDefinition)
    def two_answers_reader(self, contentConfigCode):
        configDef = self.icm.get_content_config(contentConfigCode)
        #
        # read the questions
        query = ProUtils.format_string(self.questionsReaderQuery, configDef)
        questionsDF = self.bqUtils.execute_query_to_df(query)
        #
        # find all metrics within slot
        nSlots = configDef['NumSlots']
        slotStatGroups = {}
        slotStatGroupKeys = {}
        for i in range(1, nSlots + 1):
            slotDF = questionsDF.query('slotNum == %d' % i)
            slotStatGroups[i] = slotDF.groupby(['QuestionCode',
                                                'StatObject']).groups
            slotStatGroupKeys[i] = set(slotStatGroups[i].keys())

        return questionsDF, slotStatGroups, slotStatGroupKeys
    def sportsQueriesGenerator(self, queriesQueue, sourceConfig, startTime):
        #
        # create jobs for all relevant metrics.
        for statDef in sourceConfig['StatsDefDict'].values():

            if statDef['Doit'] != 'y':
                continue

            #print('Metric: {}, Sport:{}, Delta time: {}'.format(statDef['StatName'], statDef['SportCode'], dt.now() - startTime), flush=True)

            sourceDefinitions = definitions[sourceConfig['StatSource']]

            for statObject in statDef['StatObject'].split(','):
                for statTimeframe in sourceConfig['StatTimeframe'].split(','):
                    query = sourceConfig['query']
                    query = query.replace('{StatObject}', statObject)
                    query = query.replace('{StatTimeframe}', statTimeframe)
                    if sourceConfig['StatCondition'] != '':
                        query = ProUtils.format_string(
                            query,
                            eval("self." + sourceConfig['StatCondition']))
                    else:
                        query = ProUtils.format_string(query,
                                                       {'StatCondition': True})

                    query = ProUtils.format_string(
                        query, sourceDefinitions['StatObject'][statObject])
                    query = ProUtils.format_string(query, statDef)
                    query = ProUtils.format_string(query, sourceConfig)
                    #print (query)
                    #
                    # define the destination table
                    instructions = statDef
                    instructions['StatObject'] = statObject
                    instructions['StatTimeframe'] = statTimeframe
                    instructions['StatSource'] = sourceConfig['StatSource']
                    targetTable = ProUtils.format_string(
                        targetTableFormat, instructions).replace('.', '_')
                    jobDefinition = {
                        'params': {
                            'query': query,
                            'targetDataset': targetDataset,
                            'targetTable': targetTable,
                        },
                        'StatName': statDef['StatName'],
                        'StatObject': statObject,
                        'StatTimeframe': statTimeframe
                    }
                    queriesQueue.put(jobDefinition)
Beispiel #14
0
def one_list_generator(listName, listConfigDict, startTime=dt.now()):
    listsDefDict = ProUtils.get_dict_from_jsonfile('lists_config.json')
    finquery = ProUtils.get_string_from_file('queries/top_lists_query.sql')
    #
    # read the query, configure and run it.
    instructions = {}
    if listName in listsDefDict.keys():
        listConfig = listsDefDict[listName]
    else:
        raise NotFound('List {} does not exists'.format(listName))

    instructions['StatName'] = listConfig['StatName']
    instructions['RollingDaysCondition'] = 'StatRollingDays="{}"'.format(
        listConfig['RollingDays'])

    if 'Sector' in listConfigDict:
        instructions['SectorCondition'] = 'Sector="{}"'.format(
            listConfigDict['Sector'])
    else:
        instructions['SectorCondition'] = 'TRUE'

    if listConfigDict.get('Index', '') in ['DJI', 'SNP']:
        instructions['IndexCondition'] = 'is' + listConfigDict['Index']
    else:
        instructions['IndexCondition'] = 'isSNP'

    minMarketCap = listConfigDict.get('MarketCapMin', 100)
    maxMarketCap = listConfigDict.get('MarketCapMax', 1000000000)
    instructions['MarketCapCondition'] = 'MarketCap BETWEEN {} AND {}'.format(
        minMarketCap, maxMarketCap)
    instructions['ListSize'] = min(listConfigDict.get('ListSize', 5), 10)

    #query = self.get_onelist_query(listConfigDict['Domain'])
    query = ProUtils.format_string(finquery, instructions)
    #print("Running query:\n" + query, flush=True)
    #return
    #
    # Execute the query.
    print('Starting get-top-list for {} query execution'.format(instructions),
          dt.now() - startTime)
    bqu = BigqueryUtils()
    listDF = bqu.execute_query_to_df(query)
    print(list(listDF['Symbol']))
    #listDF = listDF.query('TopBottom=="TOP"')
    #print (listDF.columns, listDF.shape, dt.now()-startTime)
    listDict = ProUtils.pandas_df_to_dict(listDF, 'TopRank')

    #
    # getting additional info
    print('Starting get_stock_fundamentals for {}'.format('SNP'),
          dt.now() - startTime)
    getstocks = GetStocksData()
    companiesDF = getstocks.get_stock_fundamentals(index='SNP')
    symbolList = list(companiesDF['Symbol'])
    print(
        'Starting StockMetricsCalculator for {}, {} companies'.format(
            symbolList, len(symbolList)),
        dt.now() - startTime)
    smc = StockMetricsCalculator(symbolList)
    print('Done StockMetricsCalculator', dt.now() - startTime)
    gsn = GetStockNews()
    for key, stockDict in listDict.items():
        stockDict['InterestingStatements'] = get_statements_for_ticker(
            stockDict['Symbol'], smc)
        stockDict['RelevantNews'] = gsn.get_stocknews_byticker(
            stockDict['Symbol'])

    listDict['Description'] = listConfig['QuestionDescription']
    print(listDict, dt.now() - startTime)
    return json.dumps(listDict)
    def get_stocknews_byticker(self,
                               tickersList,
                               nitems=50,
                               daysback=30,
                               sortby='trending'):
        assert (sortby in ['trending', 'algo'])

        tickers = str(tickersList).replace('[', '').replace(']', '').replace(
            "'", '').replace(' ', '')
        urlInstructions = {
            'ticker':
            tickers,
            'nitems':
            nitems,
            'fromdate_MMDDYYYY':
            (date.today() -
             datetime.timedelta(days=daysback)).strftime('%m%d%Y'),
            'sortby':
            sortby,
            'today':
            date.today(),
        }
        if not os.path.exists('temp'):
            os.mkdir('temp')
        outfileName = 'temp/{ticker}-{nitems}-{fromdate_MMDDYYYY}-{sortby}-{today}.json'.format(
            **urlInstructions)
        if not os.path.exists(outfileName):
            url = self.stocknews_url_template.format(**urlInstructions)
            print(url)
            response = requests.request("GET", url)
            data = json.loads(response.text)
            #ProUtils.save_dict_to_jsonfile(outfileName, data)
        else:
            data = ProUtils.get_dict_from_jsonfile(outfileName)

        newsDict = data['data']

        sentimentDict = {
            'Count': 0,
            'Negative': 0,
            'Positive': 0,
            'Neutral': 0,
            'Weighted': 0
        }
        sentimentWeight = {'Negative': -1, 'Positive': 1, 'Neutral': 0}
        count = 0
        newsFeed = []
        startTime = dt.utcnow()
        for newsItem in newsDict:
            count += 1
            newItem = {
                key: newsItem[key]
                for key in [
                    'title', 'news_url', 'text', 'sentiment', 'source_name',
                    'topics'
                ]
            }
            newItem['index'] = count
            itemDate = dt.strptime(newsItem['date'],
                                   '%a, %d %b %Y %H:%M:%S %z')
            delta = startTime.date() - itemDate.date()
            if delta.days <= 3 or count <= 3:
                newItem['date'] = str(itemDate.date())
                newsFeed.append(newItem)
            if delta.days <= 3:
                deltaWeight = 1
            elif delta.days <= 7:
                deltaWeight = 0.5
            elif delta.days <= 14:
                deltaWeight = 0.25
            elif delta.days <= 30:
                deltaWeight = 0.125
            else:
                deltaWeight = 0.05

            sentiment = newsItem['sentiment']
            sentimentDict[sentiment] += 1
            sentimentDict['Count'] += 1
            sentimentDict[
                'Weighted'] += sentimentWeight[sentiment] * deltaWeight
        retDict = {
            'NumItems': len(newsFeed),
            'Sentiment': sentimentDict,
            'Newsfeed': newsFeed,
        }

        return retDict