Beispiel #1
0
 def getConceptInfo(self, conceptUri, 
                    returnInfo = ReturnInfo(conceptInfo = ConceptInfoFlags(
                        synonyms = True, image = True, description = True))):
     """return detailed information about a particular concept"""
     params = returnInfo.getParams()
     params.update({"uri": conceptUri, "action": "getInfo" })
     return self.jsonRequest("/json/concept", params)
 def __init__(self, 
              articlesSampleSize = 20000,
              returnInfo = ReturnInfo()):
     assert articlesSampleSize <= 50000
     self.resultType = "categoryAggr"
     self.categoryAggrSampleSize = articlesSampleSize
     self.__dict__.update(returnInfo.getParams("categoryAggr"))
Beispiel #3
0
    def __init__(self,
                 maxArticleCount=60,
                 maxMinsBack=10 * 60,
                 mandatorySourceLocation=False,
                 lastActivityId=0,
                 returnInfo=ReturnInfo()):
        """
        return info about recently added articles
        @param maxArticleCount: determines the maximum number of articles to return in a single call (max 250)
        @param maxMinsBack: sets how much in the history are we interested to look
        @param mandatorySourceLocation: if True then return only articles from sources for which we know geographic location
        @param lastActivityId: another way of settings how much in the history are we interested to look. Set when you have repeated calls of the method. Set it to lastActivityId obtained in the last response
        """
        QueryParamsBase.__init__(self)

        assert maxArticleCount <= 1000
        self._setVal("action", "getRecentActivity")
        self._setVal("addEvents", False)
        self._setVal("addArticles", True)
        self._setVal("recentActivityArticlesMaxArticleCount", maxArticleCount)
        self._setVal("recentActivityArticlesMaxMinsBack", maxMinsBack)
        self._setVal("recentActivityArticlesMandatorySourceLocation",
                     mandatorySourceLocation)
        self._setVal("recentActivityArticlesLastActivityId", lastActivityId)
        self._update(returnInfo.getParams("recentActivityArticles"))
Beispiel #4
0
    def __init__(self,
                 maxEventCount=60,
                 maxMinsBack=10 * 60,
                 mandatoryLang=None,
                 mandatoryLocation=True,
                 lastActivityId=0,
                 returnInfo=ReturnInfo()):
        """
        return info about recently modified events
        
        @param maxEventCount: determines the maximum number of events to return in a single call (max 250)
        @param maxMinsBack: sets how much in the history are we interested to look
        @param mandatoryLang: set a lang or array of langs if you wish to only get events covered at least by the specified language
        @param mandatoryLocation: if set to True then return only events that have a known geographic location
        @param lastActivityId: this is another way of settings how much in the history are we interested to look. Set when you have repeated calls of the method. Set it to lastActivityId obtained in the last response
        """
        QueryParamsBase.__init__(self)

        assert maxEventCount <= 1000
        self._setVal("action", "getRecentActivity")
        self._setVal("addEvents", True)
        self._setVal("addArticles", False)
        self._setVal("recentActivityEventsMaxEventCount", maxEventCount)
        self._setVal("recentActivityEventsMaxMinsBack", maxMinsBack)
        self._setVal("recentActivityEventsMandatoryLocation",
                     mandatoryLocation)
        self._setVal("recentActivityEventsLastActivityId", lastActivityId)
        # return only events that have at least a story in the specified language
        if mandatoryLang != None:
            self._setVal("recentActivityEventsMandatoryLang", mandatoryLang)
        self._update(returnInfo.getParams("recentActivityEvents"))
Beispiel #5
0
 def __init__(
     self,
     count=20,  # number of top trends to return
     returnInfo=ReturnInfo()
 ):  # specify the details of the concepts to return
     QueryParamsBase.__init__(self)
     self._setVal("action", "getTrendingCustom")
     self._setVal("conceptCount", count)
     self._update(returnInfo.getParams())
Beispiel #6
0
 def __init__(
     self,
     lang=mainLangs,
     minArticleCosSim=-1,
     returnInfo=ReturnInfo(articleInfo=ArticleInfoFlags(bodyLen=0))):
     self.resultType = "articleTrend"
     self.articleTrendLang = lang
     self.articleTrendMinArticleCosSim = minArticleCosSim
     self.__dict__.update(returnInfo.getParams("articleTrend"))
 def __init__(self, count = 25,
              articlesSampleSize = 10000, 
              returnInfo = ReturnInfo()):
     assert count <= 50
     assert articlesSampleSize <= 50000
     self.resultType = "conceptTrends"
     self.conceptTrendsConceptCount = count
     self.conceptTrendsSampleSize = articlesSampleSize
     self.__dict__.update(returnInfo.getParams("conceptTrends"))
Beispiel #8
0
    def getTopCategoryCorrelations(self,
                                   exactCount=10,
                                   approxCount=0,
                                   returnInfo=ReturnInfo()):
        """
        compute categories that correlate the most with the input data. 

        @param exactCount: the number of returned categories for which the exact value of the correlation is computed
        @param approxCount: the number of returned categories for which only an approximate value of the correlation is computed
        @param returnInfo: specifies the details about the categories that should be returned in the output result
        """

        # generate all necessary parameters (but don't update the params of the self)
        params = QueryParamsBase.copy(self)
        # don't send unnecessary data
        params._clearVal("contextConceptIds")
        params._setVal("exactCount", exactCount)
        params._setVal("approxCount", approxCount)
        params._setVal("sourceType", "news-category")

        #
        # compute the correlations
        ret = self._er.jsonRequest(self._getPath(), params.queryParams)

        #
        # extend the return information with the details about the categories (label, ...)
        if returnInfo != None:
            categoryIds = []
            if ret and ret["news-category"]["exactCorrelations"]:
                categoryIds += [
                    info["id"]
                    for info in ret["news-category"]["exactCorrelations"]
                ]
            if ret and ret["news-category"]["approximateCorrelations"]:
                categoryIds += [
                    info["id"]
                    for info in ret["news-category"]["approximateCorrelations"]
                ]
            categoryInfos = {}
            for i in range(0, len(categoryIds), 500):
                ids = categoryIds[i:i + 500]
                q = GetCategoryInfo(returnInfo=returnInfo)
                q.queryById(ids)
                info = self._er.execQuery(q)
                categoryInfos.update(info)
            if ret and ret["news-category"]["exactCorrelations"]:
                for item in ret["news-category"]["exactCorrelations"]:
                    item["categoryInfo"] = categoryInfos.get(
                        str(item["id"]), {})
            if ret and ret["news-category"]["approximateCorrelations"]:
                for item in ret["news-category"]["approximateCorrelations"]:
                    item["categoryInfo"] = categoryInfos.get(
                        str(item["id"]), {})

        # return result
        return ret
Beispiel #9
0
 def __init__(self,
              sourceCount=30,
              eventsSampleSize=100000,
              returnInfo=ReturnInfo()):
     assert sourceCount <= 200
     assert eventsSampleSize <= 300000
     self.resultType = "sourceAggr"
     self.sourceAggrSourceCount = sourceCount
     self.sourceAggrSampleSize = eventsSampleSize
     self.__dict__.update(returnInfo.getParams("sourceAggr"))
Beispiel #10
0
 def __init__(self,
              conceptCount=20,
              eventsSampleSize=100000,
              returnInfo=ReturnInfo()):
     assert conceptCount <= 200
     assert eventsSampleSize <= 3000000
     self.resultType = "conceptAggr"
     self.conceptAggrConceptCount = conceptCount
     self.conceptAggrSampleSize = eventsSampleSize
     self.__dict__.update(returnInfo.getParams("conceptAggr"))
Beispiel #11
0
 def __init__(self,
              keywordCount=30,
              maxEventsToCluster=10000,
              returnInfo=ReturnInfo()):
     assert keywordCount <= 100
     assert maxEventsToCluster <= 10000
     self.resultType = "eventClusters"
     self.eventClustersKeywordCount = keywordCount
     self.eventClustersMaxEventsToCluster = maxEventsToCluster
     self.__dict__.update(returnInfo.getParams("eventClusters"))
Beispiel #12
0
 def __init__(self, conceptCount = 25, 
              measure = "pmi",    # measure options: pmi (pointwise mutual information), pairTfIdf (pair frequence * IDF of individual concepts), chiSquare
              articlesSampleSize = 10000, 
              returnInfo = ReturnInfo()):
     assert conceptCount <= 200
     assert articlesSampleSize <= 50000
     self.resultType = "conceptMatrix"
     self.conceptMatrixConceptCount = conceptCount
     self.conceptMatrixMeasure = measure
     self.conceptMatrixSampleSize = articlesSampleSize
     self.__dict__.update(returnInfo.getParams("conceptMatrix"))
Beispiel #13
0
 def __init__(self, page = 1, count = 20, 
              sortBy = "date", sortByAsc = False,    # how are articles sorted. Options: id (internal id), date (publishing date), cosSim (closeness to the event centroid), fq (relevance to the query), socialScore (total shares on social media)
              returnInfo = ReturnInfo()):
     assert page >= 1, "page has to be >= 1"
     assert count <= 200
     self.resultType = "articles"
     self.articlesPage = page
     self.articlesCount = count
     self.articlesSortBy = sortBy
     self.articlesSortByAsc = sortByAsc
     self.__dict__.update(returnInfo.getParams("articles"))
Beispiel #14
0
 def __init__(
     self,
     source="news",  # source information from which to compute top trends. Options: "news", "social"
     count=20,  # number of top trends to return
     returnInfo=ReturnInfo()
 ):  # specify the details of the concepts to return
     QueryParamsBase.__init__(self)
     self._setVal("action", "getConceptTrendGroups")
     self._setVal("source", source)
     self._setVal("conceptCount", count)
     self._update(returnInfo.getParams())
Beispiel #15
0
 def __init__(self, page = 1,        # page of the articles
              count = 20,            # number of articles to return
              sortBy = "cosSim", sortByAsc = False,              # how are the articles sorted. Options: id, date, cosSim, fq, socialScore, facebookShares, twitterShares
              returnInfo = ReturnInfo(articleInfo = ArticleInfoFlags(bodyLen = -1))):
     assert page >= 1, "page has to be >= 1"
     self.resultType = "duplicatedArticles"
     self.duplicatedArticlesPage = page
     self.duplicatedArticlesCount = count
     self.duplicatedArticlesSortBy = sortBy
     self.duplicatedArticlesSortByAsc = sortByAsc      
     self.__dict__.update(returnInfo.getParams("duplicatedArticles"))
Beispiel #16
0
 def getLatestArticle(self, returnInfo = ReturnInfo()):
     """
     return information about the latest imported article
     """
     stats = self.getRecentStats()
     latestId = stats["totalArticleCount"]-1
     q = QueryArticle.queryById(latestId)
     q.addRequestedResult(RequestArticleInfo(returnInfo))
     ret = self.execQuery(q)
     if ret and len(ret.keys()) > 0:
         return ret[ret.keys()[0]].get("info")
     return None
Beispiel #17
0
 def __init__(self, conceptCount = 25, 
              linkCount = 50, 
              articlesSampleSize = 10000, 
              returnInfo = ReturnInfo()):
     assert count <= 1000
     assert linkCount <= 2000
     assert articlesSampleSize <= 50000
     self.resultType = "conceptGraph"
     self.conceptGraphConceptCount = conceptCount
     self.conceptGraphLinkCount = linkCount
     self.conceptGraphSampleSize = articlesSampleSize
     self.__dict__.update(returnInfo.getParams("conceptGraph"))
Beispiel #18
0
 def __init__(self,
              conceptCount=25,
              measure="pmi",
              eventsSampleSize=100000,
              returnInfo=ReturnInfo()):
     assert conceptCount <= 200
     assert eventsSampleSize <= 300000
     self.resultType = "conceptMatrix"
     self.conceptMatrixConceptCount = conceptCount
     self.conceptMatrixMeasure = measure
     self.conceptMatrixSampleSize = eventsSampleSize
     self.__dict__.update(returnInfo.getParams("conceptMatrix"))
Beispiel #19
0
 def __init__(self,
              maxArticleCount = 60,
              maxMinsBack = 10 * 60,
              lastArticleActivityId = 0,
              articlesWithLocationOnly = True,
              returnInfo = ReturnInfo()):
     assert maxArticleCount <= 1000
     self.resultType = "recentActivity"
     self.articleRecentActivityMaxArticleCount  = maxArticleCount
     self.articleRecentActivityMaxMinsBack = maxMinsBack
     self.articleRecentActivityLastArticleActivityId  = lastArticleActivityId
     self.articleRecentActivityArticlesWithLocationOnly  = articlesWithLocationOnly
     self.__dict__.update(returnInfo.getParams("recentActivity"))
Beispiel #20
0
 def __init__(self, page = 1,                            # page of the articles
              count = 20,                                # number of articles to return
              lang = ["eng"],                            # in which language(s) should be the similar articles
              limitPerLang = -1,                         # max number of articles per language to return (-1 for no limit)
              returnInfo = ReturnInfo(articleInfo = ArticleInfoFlags(bodyLen = -1))):
     assert page >= 1, "page has to be >= 1"
     assert count <= 200
     self.resultType = "similarArticles"
     self.similarArticlesPage = page                 
     self.similarArticlesCount = count               
     self.similarArticlesLang = lang                 
     self.similarArticlesLimitPerLang = limitPerLang 
     self.__dict__.update(returnInfo.getParams("similarArticles"))
Beispiel #21
0
    def __init__(
        self,
        date=None,  # specify the date (either in YYYY-MM-DD or datetime.date format) for which to return top shared articles. If None then today is used
        count=20,  # number of top shared articles to return
        returnInfo=ReturnInfo()):
        QueryParamsBase.__init__(self)
        self._setVal("action", "getEvents")
        self._setVal("count", count)
        self._update(returnInfo.getParams())

        if date == None:
            date = datetime.date.today()
        self._setDateVal("date", date)
Beispiel #22
0
 def __init__(
     self,
     count=20,  # number of similar stories to return
     source="concept",  # how to compute similarity. Options: concept cca
     maxDayDiff=sys.
     maxint,  # what is the maximum time difference between the similar stories and this one
     returnInfo=ReturnInfo()):
     assert count <= 200
     self.resultType = "similarStories"
     self.similarEventsCount = count
     self.similarEventsSource = source
     self.similarEventsMaxDayDiff = maxDayDiff
     self.similarEventsAddArticleTrendInfo = addArticleTrendInfo
     self.__dict__.update(returnInfo.getParams("similarEvents"))
Beispiel #23
0
 def __init__(
     self,
     source="news",  # source information from which to compute top trends. Options: "news", "social"
     count=20,  # number of top trends to return
     conceptType=["person", "org",
                  "loc"],  # which types of concepts are we interested in
     returnInfo=ReturnInfo()
 ):  # specify the details of the concepts to return
     QueryParamsBase.__init__(self)
     self._setVal("action", "getTrendingConcepts")
     self._setVal("source", source)
     self._setVal("conceptCount", count)
     self._setVal("conceptType", conceptType)
     self._update(returnInfo.getParams())
Beispiel #24
0
 def __init__(
     self,
     page=1,
     count=20,
     sortBy="date",
     sortByAsc=False,  # how should the resulting events be sorted. Options: date (by event date), rel (relevance to the query), size (number of articles), socialScore (amount of shares in social media)
     returnInfo=ReturnInfo()):
     assert page >= 1, "page has to be >= 1"
     assert count <= 200
     self.resultType = "events"
     self.eventsPage = page
     self.eventsCount = count
     self.eventsSortBy = sortBy
     self.eventsSortByAsc = sortByAsc
     self.__dict__.update(returnInfo.getParams("events"))
Beispiel #25
0
 def __init__(
     self,
     count=20,  # number of similar stories to return
     source="concept",  # how to compute similarity. Options: concept, cca
     lang=["eng"],  # in which language should be the similar stories
     maxDayDiff=sys.
     maxint,  # what is the maximum time difference between the similar stories and this one
     returnInfo=ReturnInfo()):
     assert count <= 200
     self.resultType = "similarStories"
     self.similarStoriesCount = count
     self.similarStoriesSource = source
     self.similarStoriesLang = lang
     self.similarStoriesMaxDayDiff = maxDayDiff
     self.__dict__.update(returnInfo.getParams("similarStories"))
Beispiel #26
0
 def __init__(self,
              uriOrUriList = None, # concept/category uri or a list of uris
              source = "news",   # input source information from which to compute top trends. Options: "news", "social", "custom", "geo" or "sentiment"
              type = "concept",  # what do the uris represent? "concept" or "category"
              startDate = None,  # starting date from which to provide counts onwards (either None, datetime.date or "YYYY-MM-DD")
              endDate = None,    # ending date until which to provide counts (either None, datetime.date or "YYYY-MM-DD")
              returnInfo = ReturnInfo()):     # specify the details of the concepts/categories to return
     CountsBase.__init__(self)
     self._setVal("action", "getCounts")
     self._setVal("source", source)
     self._setVal("type", type)
     self._update(returnInfo.getParams())
     if uriOrUriList != None:
         self.queryByUri(uriOrUriList)
     if startDate != None or endDate != None:
         self.setDateRange(startDate, endDate)
Beispiel #27
0
 def __init__(
     self,
     page=1,  # page of the articles
     count=20,  # number of articles to return
     lang=mainLangs,  # return articles in specified language(s)
     sortBy="cosSim",
     sortByAsc=False,  # order in which story articles are sorted. Options: id (internal id), date (published date), cosSim (closeness to story centroid), socialScore (total shares in social media), facebookShares (shares on fb), twitterShares (shares on twitter)
     returnInfo=ReturnInfo(articleInfo=ArticleInfoFlags(bodyLen=200))):
     assert page >= 1, "page has to be >= 1"
     assert count <= 200
     self.resultType = "articles"
     self.articlesPage = page
     self.articlesCount = count
     self.articlesLang = lang
     self.articlesSortBy = sortBy
     self.articlesSortByAsc = sortByAsc
     self.__dict__.update(returnInfo.getParams("articles"))
Beispiel #28
0
 def __init__(
     self,
     count=20,  # number of similar events to return
     source="concept",  # how to compute similarity. Options: concept cca
     maxDayDiff=sys.
     maxint,  # what is the maximum time difference between the similar events and this one
     addArticleTrendInfo=False,  # add info how the articles in the similar events are distributed over time
     aggrHours=6,  # if similarEventsAddArticleTrendInfo == True then this is the aggregating window
     includeSelf=False,  # should the info about the event itself be included among the results
     returnInfo=ReturnInfo()):
     assert count <= 200
     self.resultType = "similarEvents"
     self.similarEventsCount = count
     self.similarEventsSource = source
     self.similarEventsMaxDayDiff = maxDayDiff
     self.similarEventsAddArticleTrendInfo = addArticleTrendInfo
     self.similarEventsAggrHours = aggrHours
     self.similarEventsIncludeSelf = includeSelf
     self.__dict__.update(returnInfo.getParams("similarEvents"))
Beispiel #29
0
 def __init__(self,
              maxEventCount=60,
              maxMinsBack=10 * 60,
              lastEventActivityId=0,
              lang="eng",
              eventsWithLocationOnly=True,
              eventsWithLangOnly=False,
              minAvgCosSim=0,
              returnInfo=ReturnInfo()):
     assert maxEventCount <= 1000
     self.resultType = "recentActivity"
     self.eventsRecentActivityMaxEventCount = maxEventCount
     self.eventsRecentActivityMaxMinsBack = maxMinsBack
     self.eventsRecentActivityLastEventActivityId = lastEventActivityId
     self.eventsRecentActivityEventLang = lang  # the language in which title should be returned
     self.eventsRecentActivityEventsWithLocationOnly = eventsWithLocationOnly  # return only events for which we've recognized their location
     self.eventsRecentActivityEventsWithLangOnly = eventsWithLangOnly  # return only event that have a cluster at least in the lang language
     self.eventsRecentActivityMinAvgCosSim = minAvgCosSim  # the minimum avg cos sim of the events to be returned (events with lower quality should not be included)
     self.__dict__.update(returnInfo.getParams("recentActivity"))
Beispiel #30
0
 def suggestCustomConcepts(self, prefix, lang = "eng", conceptLang = "eng", page = 1, count = 20, returnInfo = ReturnInfo()):
     """
     return a list of custom concepts that contain the given prefix
     custom concepts are the things (indicators, stock prices, ...) for which we import daily trending values that can be obtained using GetCounts class
     """
     assert page > 0, "page parameter should be above 0"
     params = { "prefix": prefix, "lang": lang, "conceptLang": conceptLang, "page": page, "count": count }
     params.update(returnInfo.getParams())
     return self.jsonRequest("/json/suggestCustomConcepts", params)