def __init__(self, eventsSampleSize=100000, returnInfo=ReturnInfo()):
     """
     return aggreate of locations and times of resulting events
     @param eventsSampleSize: sample of events to use to compute the location aggregate (at most 100000)
     @param returnInfo: what details (about locations) should be included in the returned information
     """
     assert eventsSampleSize <= 100000
     self.resultType = "locTimeAggr"
     self.locTimeAggrSampleSize = eventsSampleSize
     self.__dict__.update(returnInfo.getParams("locTimeAggr"))
 def __init__(self, conceptCount=10, returnInfo=ReturnInfo()):
     """
     return a list of top trending concepts and their daily trending info over time
     @param conceptCount: number of top concepts to return (at most 50)
     @param returnInfo: what details about the concepts should be included in the returned information
     """
     assert conceptCount <= 50
     self.resultType = "conceptTrends"
     self.conceptTrendsConceptCount = conceptCount
     self.__dict__.update(returnInfo.getParams("conceptTrends"))
Beispiel #3
0
 def __init__(self, articlesSampleSize=20000, returnInfo=ReturnInfo()):
     """
     get aggreate of news sources of resulting articles
     @param articlesSampleSize: on what sample of results should the aggregate be computed (at most 1000000)
     @param returnInfo: what details about the sources should be included in the returned information
     """
     assert articlesSampleSize <= 1000000
     self.resultType = "sourceAggr"
     self.sourceAggrSampleSize = articlesSampleSize
     self.__dict__.update(returnInfo.getParams("sourceAggr"))
 def __init__(self, mentionsSampleSize=20000, returnInfo=ReturnInfo()):
     """
     return aggreate of categories of resulting mentions
     @param mentionsSampleSize: on what sample of results should the aggregate be computed (at most 50000)
     @param returnInfo: what details about the categories should be included in the returned information
     """
     assert mentionsSampleSize <= 50000
     self.resultType = "categoryAggr"
     self.categoryAggrSampleSize = mentionsSampleSize
     self.__dict__.update(returnInfo.getParams("categoryAggr"))
Beispiel #5
0
    def __init__(self,
                 uriOrUriList,
                 source = "news",
                 type = "concept",
                 dateStart = None,
                 dateEnd = None,
                 returnInfo = ReturnInfo()):
        """
        obtain information about how frequently a concept or category is mentioned in the articles on particular dates
        Similar to GetCounts, but the output is more friendly for a larger set of provided uris/ids at once
        Usage example:
            q = GetCountsEx(type = "category")
            q.queryById(range(10))  # return trends of first 10 categories
            ret = er.execQuery(q)
        Return object:
            {
                "categoryInfo": [
                    {
                        "id": 0,
                        "label": "Root",
                        "uri": "http://www.dmoz.org"
                    },
                    {
                        "id": 1,
                        "label": "Recreation",
                        "uri": "http://www.dmoz.org/Recreation"
                    },
                    ...
                ],
                "counts": [
                    {
                        "0": 23, "1": 42, "2": 52, "3": 32, "4": 21, "5": 65, "6": 32, "7": 654, "8": 1, "9": 34,
                        "date": "2015-05-07"
                    },
                    ...
                ]
            }

        @param uriOrUriList: concept/category uri or a list of uris
        @param source: input source information from which to compute top trends. Options: "news", "social"
        @param type: what do the uris represent? "concept" or "category"
        @param dateStart: starting date from which to provide counts onwards (either None, datetime.date or "YYYY-MM-DD")
        @param dateEnd: ending date until which to provide counts (either None, datetime.date or "YYYY-MM-DD")
        @param returnInfo: what details should be included in the returned information
        """
        CountsBase.__init__(self)
        self._setVal("action", "getCountsEx")
        self._setVal("source", source)
        self._setVal("type", type)
        self._update(returnInfo.getParams())
        self._setVal("uri", uriOrUriList)
        if dateStart != None:
            self._setDateVal("dateStart", dateStart)
        if dateEnd != None:
            self._setDateVal("dateEnd", dateEnd)
Beispiel #6
0
 def __init__(self, uriOrUriList=None, returnInfo=ReturnInfo()):
     """
     obtain desired information about one or more news sources
     @param uriOrUriList: single source uri or a list of source uris for which to return information
     @param returnInfo: what details about the source should be included in the returned information
     """
     QueryParamsBase.__init__(self)
     self._setVal("action", "getInfo")
     if uriOrUriList != None:
         self.queryByUri(uriOrUriList)
     self._update(returnInfo.getParams())
 def getConceptInfo(self, conceptUri,
                    returnInfo = ReturnInfo(conceptInfo = ConceptInfoFlags(
                        synonyms = True, image = True, description = True))):
     """
     return detailed information about a particular concept
     @param conceptUri: uri of the concept
     @param returnInfo: what details about the concept should be included in the returned information
     """
     params = returnInfo.getParams()
     params.update({"uri": conceptUri })
     return self.jsonRequest("/api/v1/concept/getInfo", params)
Beispiel #8
0
 def __init__(self, count=20, returnInfo=ReturnInfo()):
     """
     get currently top trending items for which the users provided the data
     this data can be stock prices, energy prices, etc...
     @param count: number of top trends to return
     @param returnInfo: what details should be included in the returned information
     """
     QueryParamsBase.__init__(self)
     self._setVal("action", "getTrendingCustom")
     self._setVal("conceptCount", count)
     self._update(returnInfo.getParams())
Beispiel #9
0
 def __init__(self,
              count = 50,                    # number of similar stories to return
              source = "concept",            # how to compute similarity. Options: concept cca
              maxDayDiff = sys.maxsize,       # what is the maximum time difference between the similar stories and this one
              returnInfo = ReturnInfo()):
     assert count <= 50
     self.resultType = "similarStories"
     self.similarStoriesCount = count
     self.similarStoriesSource = source
     if maxDayDiff != sys.maxsize:
         self.similarStoriesMaxDayDiff = maxDayDiff
     self.__dict__.update(returnInfo.getParams("similarStories"))
 def __init__(self, source="news", count=20, returnInfo=ReturnInfo()):
     """
     get currently top trending categories
     @param source: source information from which to compute top trends. Options: "news", "social"
     @param count: number of top trends to return
     @param returnInfo: what details should be included in the returned information
     """
     QueryParamsBase.__init__(self)
     self._setVal("action", "getTrendingCategories")
     self._setVal("source", source)
     self._setVal("categoryCount", count)
     self._update(returnInfo.getParams())
Beispiel #11
0
    def __init__(self,
                 date = None,     # specify the date (either in YYYY-MM-DD or datetime.date format) for which to return top shared articles. If None then today is used
                 count = 20,      # number of top shared articles to return
                 returnInfo = ReturnInfo()):
        QueryParamsBase.__init__(self)
        self._setVal("action", "getEvents")
        self._setVal("count", count)
        self._update(returnInfo.getParams())

        if date == None:
            date = datetime.date.today()
        self._setDateVal("date", date)
Beispiel #12
0
 def getLatestArticle(self, returnInfo=ReturnInfo()):
     """
     return information about the latest imported article
     """
     stats = self.getRecentStats()
     latestId = stats["totalArticleCount"] - 1
     q = QueryArticle.queryById(latestId)
     q.addRequestedResult(RequestArticleInfo(returnInfo))
     ret = self.execQuery(q)
     if ret and len(list(ret.keys())) > 0:
         return ret[list(ret.keys())[0]].get("info")
     return None
Beispiel #13
0
    def __init__(self,
                 uriOrUriList,
                 source = "news",
                 type = "concept",
                 dateStart = None,
                 dateEnd = None,
                 returnInfo = ReturnInfo()):
        """
        obtain information about how frequently a concept or category is mentioned in the articles on particular dates
        by specifying source="custom" one can obtain counts for custom concepts, such as stocks, macroeconomic indicators, etc. The uri
        for these can be found using EventRegistry.getCustomConceptUri() method.
        Usage example:
            q = GetCounts([er.getConceptUri("Obama"), er.getConceptUri("ebola")])
            ret = er.execQuery(q)
        Return object:
            {
                "http://en.wikipedia.org/wiki/Barack_Obama": [
                    {
                        "count": 1,
                        "date": "2015-05-07"
                    },
                    {
                        "count": 4,
                        "date": "2015-05-08"
                    },
                    ...
                ],
                "http://en.wikipedia.org/wiki/Ebola_virus_disease": [
                    {
                        "count": 0,
                        "date": "2015-05-07"
                    },
                    ...
                ]
            }

        @param uriOrUriList: concept/category uri or a list of uris
        @param source: input source information from which to compute top trends. Options: "news", "social", "custom", "geo" or "sentiment"
        @param type: what do the uris represent? "concept" or "category"
        @param dateStart: starting date from which to provide counts onwards (either None, datetime.date or "YYYY-MM-DD")
        @param dateEnd: ending date until which to provide counts (either None, datetime.date or "YYYY-MM-DD")
        @param returnInfo: what details should be included in the returned information
        """
        CountsBase.__init__(self)
        self._setVal("action", "getCounts")
        self._setVal("source", source)
        self._setVal("type", type)
        self._update(returnInfo.getParams())
        self._setVal("uri", uriOrUriList)
        if dateStart != None:
            self._setDateVal("dateStart", dateStart)
        if dateEnd != None:
            self._setDateVal("dateEnd", dateEnd)
 def suggestCategories(self, prefix, page = 1, count = 20, returnInfo = ReturnInfo()):
     """
     return a list of dmoz categories that contain the prefix
     @param prefix: input text that should be contained in the category name
     @param page:  page of the results (1, 2, ...)
     @param count: number of returned suggestions
     @param returnInfo: what details about categories should be included in the returned information
     """
     assert page > 0, "page parameter should be above 0"
     params = { "prefix": prefix, "page": page, "count": count }
     params.update(returnInfo.getParams())
     return self.jsonRequest("/json/suggestCategories", params)
Beispiel #15
0
 def __init__(self, source="news", count=20, returnInfo=ReturnInfo()):
     """
     get currently top trending groups of concepts
     a group can be identified by the concept type or by a concept class uri
     @param source: source information from which to compute top trends. Options: "news", "social"
     @param count: number of top trends to return
     @param returnInfo: what details should be included in the returned information
     """
     QueryParamsBase.__init__(self)
     self._setVal("action", "getConceptTrendGroups")
     self._setVal("source", source)
     self._setVal("conceptCount", count)
     self._update(returnInfo.getParams())
    def getTopCategoryCorrelations(self,
            exactCount = 10,
            approxCount = 0,
            returnInfo = ReturnInfo()):
        """
        compute categories that correlate the most with the input data.

        @param exactCount: the number of returned categories for which the exact value of the correlation is computed
        @param approxCount: the number of returned categories for which only an approximate value of the correlation is computed
        @param returnInfo: specifies the details about the categories that should be returned in the output result
        """

        # generate all necessary parameters (but don't update the params of the self)
        params = QueryParamsBase.copy(self)
        # don't send unnecessary data
        params._clearVal("contextConceptIds")
        params._setVal("exactCount", exactCount)
        params._setVal("approxCount", approxCount)
        params._setVal("sourceType", "news-category")

        #
        # compute the correlations
        ret = self._er.jsonRequest(self._getPath(), params.queryParams)

        #
        # extend the return information with the details about the categories (label, ...)
        if returnInfo != None:
            categoryIds = []
            if ret and ret["news-category"]["exactCorrelations"]:
                categoryIds += [info["id"] for info in ret["news-category"]["exactCorrelations"]]
            if ret and ret["news-category"]["approximateCorrelations"]:
                categoryIds += [info["id"] for info in ret["news-category"]["approximateCorrelations"]]
            categoryInfos = {}
            for i in range(0, len(categoryIds), 500):
                ids = categoryIds[i:i+500]
                q = GetCategoryInfo(returnInfo = returnInfo)
                q.queryById(ids)
                info = self._er.execQuery(q)
                categoryInfos.update(info)
            if ret and ret["news-category"]["exactCorrelations"]:
                for item in ret["news-category"]["exactCorrelations"]:
                    item["categoryInfo"] = categoryInfos.get(str(item["id"]), {})
            if ret and ret["news-category"]["approximateCorrelations"]:
                for item in ret["news-category"]["approximateCorrelations"]:
                    item["categoryInfo"] = categoryInfos.get(str(item["id"]), {})

        # return result
        return ret
Beispiel #17
0
 def __init__(self,
              keywordCount=30,
              maxEventsToCluster=10000,
              returnInfo=ReturnInfo()):
     """
     return hierarchical clustering of events into smaller clusters. 2-means clustering is applied on each node in the tree
     @param keywordCount: number of keywords to report in each of the clusters (at most 100)
     @param maxEventsToCluster: try to cluster at most this number of events (at most 10000)
     @param returnInfo: what details about the concepts should be included in the returned information
     """
     assert keywordCount <= 100
     assert maxEventsToCluster <= 10000
     self.resultType = "eventClusters"
     self.eventClustersKeywordCount = keywordCount
     self.eventClustersMaxEventsToCluster = maxEventsToCluster
     self.__dict__.update(returnInfo.getParams("eventClusters"))
Beispiel #18
0
 def __init__(self,
              sourceCount=30,
              eventsSampleSize=50000,
              returnInfo=ReturnInfo()):
     """
     return top news sources that report about the events that match the search conditions
     @param sourceCount: number of top sources to return (at most 200)
     @param eventsSampleSize: on what sample of results should the aggregate be computed (at most 300000)
     @param returnInfo: what details about the sources should be included in the returned information
     """
     assert sourceCount <= 200
     assert eventsSampleSize <= 100000
     self.resultType = "sourceAggr"
     self.sourceAggrSourceCount = sourceCount
     self.sourceAggrSampleSize = eventsSampleSize
     self.__dict__.update(returnInfo.getParams("sourceAggr"))
Beispiel #19
0
 def __init__(self,
              conceptUris=None,
              conceptCount=10,
              returnInfo=ReturnInfo()):
     """
     return a list of top trending concepts and their daily trending info over time
     @param conceptUris: list of concept URIs for which to return trending information. If None, then top concepts will be automatically computed
     @param count: if the concepts are not provided, what should be the number of automatically determined concepts to return (at most 50)
     @param returnInfo: what details about the concepts should be included in the returned information
     """
     assert conceptCount <= 50
     self.resultType = "conceptTrends"
     if conceptUris != None:
         self.conceptTrendsConceptUri = conceptUris
     self.conceptTrendsConceptCount = conceptCount
     self.__dict__.update(returnInfo.getParams("conceptTrends"))
Beispiel #20
0
 def __init__(self,
              sourceCount=50,
              normalizeBySourceArts=False,
              returnInfo=ReturnInfo()):
     """
     get aggreate of news sources of resulting articles
     @param sourceCount: the number of top sources to return
     @param normalizeBySourceArts: some sources generate significantly more content than others which is why
         they can appear as top souce for a given query. If you want to normalize and sort the sources by the total number of
         articles that they have published set this to True. This will return as top sources those that potentially publish less
         content overall, but their published content is more about the searched query.
     @param returnInfo: what details about the sources should be included in the returned information
     """
     self.resultType = "sourceAggr"
     self.sourceAggrSourceCount = sourceCount
     self.__dict__.update(returnInfo.getParams("sourceAggr"))
Beispiel #21
0
    def __init__(
        self,
        date=None,  # specify the date (either in YYYY-MM-DD or datetime.date format) for which to return top shared articles. If None then today is used
        count=20,  # number of top shared articles to return
        returnInfo=ReturnInfo()):
        QueryParamsBase.__init__(self)
        self._setVal("action", "getArticles")
        self._setVal("resultType", "articles")
        self._setVal("articlesCount", count)
        self._setVal("articlesSortBy", "socialScore")
        self._update(returnInfo.getParams("articles"))

        if date == None:
            date = datetime.date.today()
        self._setDateVal("dateStart", date)
        self._setDateVal("dateEnd", date)
Beispiel #22
0
 def __init__(self,
              conceptCount=20,
              eventsSampleSize=100000,
              returnInfo=ReturnInfo()):
     """
     compute which concept are the most frequently occuring in the list of resulting events
     @param conceptCount: number of top concepts to return (at most 200)
     @param eventsSampleSize: on what sample of results should the aggregate be computed (at most 1000000)
     @param returnInfo: what details about the concepts should be included in the returned information
     """
     assert conceptCount <= 200
     assert eventsSampleSize <= 1000000
     self.resultType = "conceptAggr"
     self.conceptAggrConceptCount = conceptCount
     self.conceptAggrSampleSize = eventsSampleSize
     self.__dict__.update(returnInfo.getParams("conceptAggr"))
 def __init__(self,
              conceptCount = 25,
              articlesSampleSize = 10000,
              returnInfo = ReturnInfo()):
     """
     get aggreate of concepts of resulting articles
     @param conceptCount: number of top concepts to return (at most 500)
     @param articlesSampleSize: on what sample of results should the aggregate be computed (at most 20000)
     @param returnInfo: what details about the concepts should be included in the returned information
     """
     assert conceptCount <= 500
     assert articlesSampleSize <= 20000
     self.resultType = "conceptAggr"
     self.conceptAggrConceptCount = conceptCount
     self.conceptAggrSampleSize = articlesSampleSize
     self.__dict__.update(returnInfo.getParams("conceptAggr"))
 def __init__(
     self,
     page=1,  # page of the articles
     count=20,  # number of articles to return
     lang=mainLangs,  # return articles in specified language(s)
     sortBy="cosSim",
     sortByAsc=False,  # order in which story articles are sorted. Options: id (internal id), date (published date), cosSim (closeness to story centroid), socialScore (total shares in social media), facebookShares (shares on fb), twitterShares (shares on twitter)
     returnInfo=ReturnInfo(articleInfo=ArticleInfoFlags(bodyLen=200))):
     assert page >= 1, "page has to be >= 1"
     assert count <= 200
     self.resultType = "articles"
     self.articlesPage = page
     self.articlesCount = count
     self.articlesLang = lang
     self.articlesSortBy = sortBy
     self.articlesSortByAsc = sortByAsc
     self.__dict__.update(returnInfo.getParams("articles"))
Beispiel #25
0
 def __init__(self,
              source = "news",
              count = 20,
              conceptType = ["person", "org", "loc"],
              returnInfo = ReturnInfo()):
     """
     get currently top trending concepts
     @param source: source information from which to compute top trends. Options: "news", "social"
     @param count: number of top trends to return
     @param conceptType: which types of concepts are we interested in
     @param returnInfo: what details should be included in the returned information
     """
     QueryParamsBase.__init__(self)
     self._setVal("action", "getTrendingConcepts")
     self._setVal("source", source)
     self._setVal("conceptCount", count)
     self._setVal("conceptType", conceptType)
     self._update(returnInfo.getParams())
 def __init__(self,
              conceptCount = 25,
              measure = "pmi",
              articlesSampleSize = 10000,
              returnInfo = ReturnInfo()):
     """
     get aggreate of concept co-occurences of resulting articles
     @param conceptCount: how many concepts should be returned (at most 200)
     @param measure: how should the interestingness between the selected pairs of concepts be computed. Options: pmi (pointwise mutual information), pairTfIdf (pair frequence * IDF of individual concepts), chiSquare
     @param articlesSampleSize: on what sample of results should the aggregate be computed (at most 50000)
     @param returnInfo: what details should be included in the returned information
     """
     assert conceptCount <= 200
     assert articlesSampleSize <= 50000
     self.resultType = "conceptMatrix"
     self.conceptMatrixConceptCount = conceptCount
     self.conceptMatrixMeasure = measure
     self.conceptMatrixSampleSize = articlesSampleSize
     self.__dict__.update(returnInfo.getParams("conceptMatrix"))
 def __init__(self,
              conceptUris = None,
              count = 25,
              articlesSampleSize=10000,
              returnInfo = ReturnInfo()):
     """
     get trending of concepts in the resulting articles
     @param conceptUris: list of concept URIs for which to return trending information. If None, then top concepts will be automatically computed
     @param count: if the concepts are not provided, what should be the number of automatically determined concepts to return (at most 50)
     @param articlesSampleSize: on what sample of results should the aggregate be computed (at most 50000)
     @param returnInfo: what details should be included in the returned information
     """
     assert count <= 50
     assert articlesSampleSize <= 50000
     self.resultType = "conceptTrends"
     if conceptUris != None:
         self.conceptTrendsConceptUri = conceptUris
     self.conceptTrendsConceptCount = count
     self.conceptTrendsSampleSize = articlesSampleSize
     self.__dict__.update(returnInfo.getParams("conceptTrends"))
 def __init__(self,
              conceptCount = 25,
              linkCount = 50,
              articlesSampleSize = 10000,
              returnInfo = ReturnInfo()):
     """
     get concept graph of resulting articles. Identify concepts that frequently co-occur with other concepts
     @param conceptCount: how many concepts should be returned (at most 1000)
     @param linkCount: how many top links between the concepts should be returned (at most 2000)
     @param articlesSampleSize: on what sample of results should the aggregate be computed (at most 50000)
     @param returnInfo: what details about the concepts should be included in the returned information
     """
     assert conceptCount <= 1000
     assert linkCount <= 2000
     assert articlesSampleSize <= 50000
     self.resultType = "conceptGraph"
     self.conceptGraphConceptCount = conceptCount
     self.conceptGraphLinkCount = linkCount
     self.conceptGraphSampleSize = articlesSampleSize
     self.__dict__.update(returnInfo.getParams("conceptGraph"))
Beispiel #29
0
 def __init__(self,
              conceptCount=50,
              linkCount=150,
              eventsSampleSize=50000,
              returnInfo=ReturnInfo()):
     """
     compute which concept pairs frequently co-occur together in the resulting events
     @param conceptCount: number of top concepts to return (at most 1,000)
     @param linkCount: number of links between the concepts to return (at most 2,000)
     @param eventsSampleSize: on what sample of results should the aggregate be computed (at most 100000)
     @param returnInfo: what details about the concepts should be included in the returned information
     """
     assert conceptCount <= 1000
     assert linkCount <= 2000
     assert eventsSampleSize <= 300000
     self.resultType = "conceptGraph"
     self.conceptGraphConceptCount = conceptCount
     self.conceptGraphLinkCount = linkCount
     self.conceptGraphSampleSize = eventsSampleSize
     self.__dict__.update(returnInfo.getParams("conceptGraph"))
 def __init__(self,
              page = 1,
              count = 100,
              sortBy = "date", sortByAsc = False,
              returnInfo = ReturnInfo()):
     """
     return article details for resulting articles
     @param page: page of the articles to return
     @param count: number of articles to return for the given page (at most 100)
     @param sortBy: how are articles sorted. Options: id (internal id), date (publishing date), cosSim (closeness to the event centroid), rel (relevance to the query), sourceImportance (manually curated score of source importance - high value, high importance), sourceImportanceRank (reverse of sourceImportance), sourceAlexaGlobalRank (global rank of the news source), sourceAlexaCountryRank (country rank of the news source), socialScore (total shares on social media), facebookShares (shares on Facebook only)
     @param sortByAsc: should the results be sorted in ascending order (True) or descending (False)
     @param returnInfo: what details should be included in the returned information
     """
     assert page >= 1, "page has to be >= 1"
     assert count <= 200, "at most 100 articles can be returned per call"
     self.resultType = "articles"
     self.articlesPage = page
     self.articlesCount = count
     self.articlesSortBy = sortBy
     self.articlesSortByAsc = sortByAsc
     self.__dict__.update(returnInfo.getParams("articles"))