Python gdeltRangeString 예제들, gdelt.dateFuncs.gdeltRangeString Python 예제들

예제 #1

0

파일 보기

 def test_urlbuilder_v4(self):
     date_sequence = '2001 Mar 01'
     ranger_output = dateRanger(date_sequence)
     gdeltstring_output = gdeltRangeString(ranger_output, version=1)
     urlbuilder_test = urlBuilder(gdeltstring_output, version=1)
     exp = 'http://data.gdeltproject.org/events/2001.zip'
     return self.assertEqual(exp, urlbuilder_test, "Version 1 Url works.")

예제 #2

0

파일 보기

    def test_urlbuilder_v2(self):
        date_sequence = '2016 10 01'
        ranger_output = dateRanger(date_sequence)
        gdeltstring_output = gdeltRangeString(ranger_output, version=2)
        urlbuilder_test = urlBuilder(gdeltstring_output, version=2)
        exp = 'http://data.gdeltproject.org/gdeltv2/20161001234500.export.CSV.zip'

        return self.assertEqual(exp, urlbuilder_test, "Version 2 Url works.")

예제 #3

0

파일 보기

 def test_urlbuilder_graph2_pass(self):
     date_sequence = '2015 Apr 01'
     ranger_output = dateRanger(date_sequence)
     gdeltstring_output = gdeltRangeString(ranger_output, version=2)
     urlbuilder_test = urlBuilder(gdeltstring_output,
                                  table='gkg',
                                  version=2)
     exp = 'http://data.gdeltproject.org/gdeltv2/20150401234500.gkg.csv.zip'
     return self.assertEqual(exp, urlbuilder_test, "Version 2 Url works.")

예제 #4

0

파일 보기

 def test_urlbuilder_v5(self):
     date_sequence = '2013 Apr 01'
     ranger_output = dateRanger(date_sequence)
     gdeltstring_output = gdeltRangeString(ranger_output, version=1)
     urlbuilder_test = urlBuilder(gdeltstring_output,
                                  table='gkg',
                                  version=1)
     exp = 'http://data.gdeltproject.org/gkg/20130401.gkg.csv.zip'
     return self.assertEqual(exp, urlbuilder_test, "Version 1 Url works.")

예제 #5

0

파일 보기

파일: test_gdeltRangeString.py 프로젝트: smritigambhir/gdeltPyR

 def test_gdeltrange_sequence_v2(self):
     date_sequence = ['2016 10 01', '2016 10 05']
     ranger_output = dateRanger(date_sequence)
     gdeltstring_test = np.sort(np.array(gdeltRangeString(ranger_output, version=2)))
     exp = np.sort(np.array(['20161001234500',
                             '20161002234500',
                             '20161003234500',
                             '20161004234500',
                             '20161005234500']))
     np.testing.assert_array_equal(exp, gdeltstring_test)

예제 #6

0

파일 보기

 def test_urlbuilder_events1_passlist(self):
     date_sequence = ['2015 Apr 01', '2015 Apr 02']
     ranger_output = dateRanger(date_sequence)
     gdeltstring_output = gdeltRangeString(ranger_output, version=1)
     urlbuilder_test = urlBuilder(gdeltstring_output,
                                  table='events',
                                  version=1)
     exp = np.sort(
         np.array([
             'http://data.gdeltproject.org/events/20150401.export.CSV.zip',
             'http://data.gdeltproject.org/events/20150402.export.CSV.zip'
         ]))
     return np.testing.assert_array_equal(
         np.sort(np.array(exp)), np.sort(np.array(urlbuilder_test)))

예제 #7

0

파일 보기

파일: test_gdeltRangeString.py 프로젝트: smritigambhir/gdeltPyR

 def test_gdeltrange_sequence_v2_with_coverage(self):
     date_sequence = ['2016 10 01']
     ranger_output = dateRanger(date_sequence)
     gdeltstring_test = np.sort(np.array(gdeltRangeString(ranger_output, coverage=True, version=2)))
     exp = np.sort(np.array(['20161001000000', '20161001001500', '20161001003000',
                             '20161001004500', '20161001010000', '20161001011500',
                             '20161001013000', '20161001014500', '20161001020000',
                             '20161001021500', '20161001023000', '20161001024500',
                             '20161001030000', '20161001031500', '20161001033000',
                             '20161001034500', '20161001040000', '20161001041500',
                             '20161001043000', '20161001044500', '20161001050000',
                             '20161001051500', '20161001053000', '20161001054500',
                             '20161001060000', '20161001061500', '20161001063000',
                             '20161001064500', '20161001070000', '20161001071500',
                             '20161001073000', '20161001074500', '20161001080000',
                             '20161001081500', '20161001083000', '20161001084500',
                             '20161001090000', '20161001091500', '20161001093000',
                             '20161001094500', '20161001100000', '20161001101500',
                             '20161001103000', '20161001104500', '20161001110000',
                             '20161001111500', '20161001113000', '20161001114500',
                             '20161001120000', '20161001121500', '20161001123000',
                             '20161001124500', '20161001130000', '20161001131500',
                             '20161001133000', '20161001134500', '20161001140000',
                             '20161001141500', '20161001143000', '20161001144500',
                             '20161001150000', '20161001151500', '20161001153000',
                             '20161001154500', '20161001160000', '20161001161500',
                             '20161001163000', '20161001164500', '20161001170000',
                             '20161001171500', '20161001173000', '20161001174500',
                             '20161001180000', '20161001181500', '20161001183000',
                             '20161001184500', '20161001190000', '20161001191500',
                             '20161001193000', '20161001194500', '20161001200000',
                             '20161001201500', '20161001203000', '20161001204500',
                             '20161001210000', '20161001211500', '20161001213000',
                             '20161001214500', '20161001220000', '20161001221500',
                             '20161001223000', '20161001224500', '20161001230000',
                             '20161001231500', '20161001233000', '20161001234500'],
                            dtype='<U14'))
     np.testing.assert_array_equal(exp, gdeltstring_test)

예제 #8

0

파일 보기

파일: base.py 프로젝트: smritigambhir/gdeltPyR

    def Search(self,
               date,
               table='events',
               coverage=False,
               translation=False,
               output=None,
               queryTime=datetime.datetime.now().strftime('%m-%d-%Y %H:%M:%S'),
               normcols=False):
        """Core searcher method to set parameters for GDELT data searches

        Keyword arguments
        ----------
        date : str, required
            The string representation of a datetime (single) or date
            range (list of strings) that is (are) the targeted timelines to
            pull GDELT data.

        table : string,{'events','gkg','mentions'}
            Select from the table formats offered by the GDELT service:

                * events (1.0 and 2.0)

                    The biggest difference between 1.0 and 2.0 are the
                    update frequencies.  1.0 data is disseminated daily,
                    and the most recent data will be published at 6AM
                    Eastern Standard time of the next day. So, 21 August 2016
                    results would be available 22 August 2016 at 6AM EST.  2.0
                    data updates every 15 minutes of the current day.


                    Version 1.0  runs from January 1, 1979 through March 31,
                    2013 contains 57 fields for each record. The Daily
                    Updates  collection, which begins April 1, 2013 and runs
                    through present, contains an additional field at the end
                    of each record, for a total of 58 fields for each
                    record. The format is dyadic CAMEO format, capturing two
                    actors and the action performed by Actor1 upon Actor2.

                    Version 2.0 only covers February 19, 2015 onwards,
                    and is stored in an expanded version of the dyadic CAMEO
                    format .  See
                    http://data.gdeltproject.org/documentation/GDELT-Event_
                    Codebook-V2.0.pdf for more information.

                * gkg  (1.0 and 2.0)

                    **Warning** These tables and queries can be extremely
                    large and consume a lot of RAM. Consider running a
                    single days worth of gkg pulls, store to disc,
                    flush RAM, then proceed to the next day.

                    Table that represents all of the latent dimensions,
                    geography, and network structure of the global news. It
                    applies an array of highly sophisticated natural language
                    processing algorithms to each document to compute a range
                    of codified metadata encoding key latent and contextual
                    dimensions of the document.  Version 2.0 includes Global
                    Content Analysis Measures (GCAM) which reportedly
                    provides 24 emotional measurement packages that assess
                    more than 2,300 emotions and themes from every article
                    in realtime, multilingual  dimensions natively assessing
                    the emotions of 15 languages (Arabic, Basque, Catalan,
                    Chinese, French, Galician, German, Hindi, Indonesian,
                    Korean, Pashto, Portuguese, Russian, Spanish,
                    and Urdu).See documentation about GKG
                    1.0 at http://data.gdeltproject.org/documentation/GDELT-
                    Global_Knowledge_Graph_Codebook.pdf, and GKG 2.0 at http://
                    data.gdeltproject.org/documentation/GDELT-Global_Knowledge_
                    Graph_Codebook-V2.1.pdf.

                * mentions  (2.0 only)

                     Mentions table records every mention
                     of an event over time, along with the timestamp the
                     article was published. This allows the progression of
                     an event   through the global media to be tracked,
                     identifying  outlets that tend to break certain kinds
                     of events the  earliest or which may break stories
                     later but are more  accurate in their reporting on
                     those events. Combined  with the 15 minute update
                     resolution and GCAM, this also  allows the emotional
                     reaction and resonance of an event to be assessed as
                     it sweeps through the world’s media.

        coverage : bool, default: False
            When set to 'True' and the GDELT version parameter is set to 2,
            gdeltPyR will pull back every 15 minute interval in the day (
            full results) or, if pulling for the current day, pull all 15
            minute intervals up to the most recent 15 minute interval of the
            current our.  For example, if the current date is 22 August,
            2016 and the current time is 0828 HRs Eastern, our pull would
            get pull every 15 minute interval in the day up to 0815HRs.
            When coverate is set to true and a date range is entered,
            we pull every 15 minute interval for historical days and up to
            the most recent 15 minute interval for the current day, if that
            day is included.
            
        translation : bool, default: False
            Whether or not to pull the translation database available from
            version 2 of GDELT. If translation is True, the translated set
            is downloaded, if set to False the english set is downloaded. 

        queryTime : datetime object, system generated
            This records the system time when gdeltPyR's query was executed,
            which can be used for logging purposes.

        output : string, {None,'df','gpd','shp','shapefile', 'json', 'geojson'
                'r','geodataframe'}
            Select the output format for the returned GDELT data

            Options
            -------

            json - Javascript Object Notation output; returns list of
            dictionaries in Python or a list of json objects

            r - writes the cross language dataframe to the current directory.
            This uses the Feather library found at https://github.com/wesm/
            feather.  This option returns a pandas dataframe but write the R
            dataframe to the current working directory. The filename
            includes all the parameters used to launch the query: version,
            coverage, table name, query dates, and query time.

            csv- Outputs a CSV format; all dates and columns are joined
            
            shp- Writes an ESRI shapefile to current directory or path; output
            is filtered to exclude rows with no latitude or longitude
            
            geojson- 
            
            geodataframe- Returns a geodataframe; output is filtered to exclude
            rows with no latitude or longitude.  This output can be manipulated
            for geoprocessing/geospatial operations such as reprojecting the 
            coordinates, creating a thematic map (choropleth map), merging with
            other geospatial objects, etc.  See http://geopandas.org/ for info.

        normcols : bool
            Applies a generic lambda function to normalize GDELT columns 
            for compatibility with SQL or Shapefile outputs.  
        Examples
        --------
        >>> from gdelt
        >>> gd = gdelt.gdelt(version=1)
        >>> results = gd.Search(['2016 10 19'],table='events',coverage=True)
        >>> print(len(results))
        244767
        >>> gd = gdelt.gdelt(version=2)
        >>> results = gd.Search(['2016 Oct 10'], table='gkg')
        >>> print(len(results))
        2398
        >>> print(results.V2Persons.ix[2])
        Juanita Broaddrick,1202;Monica Lewinsky,1612;Donald Trump,12;Donald
        Trump,244;Wolf Blitzer,1728;Lucianne Goldberg,3712;Linda Tripp,3692;
        Bill Clinton,47;Bill Clinton,382;Bill Clinton,563;Bill Clinton,657;Bill
         Clinton,730;Bill Clinton,1280;Bill Clinton,2896;Bill Clinton,3259;Bill
          Clinton,4142;Bill Clinton,4176;Bill Clinton,4342;Ken Starr,2352;Ken
          Starr,2621;Howard Stern,626;Howard Stern,4286;Robin Quivers,4622;
          Paula Jones,3187;Paula Jones,3808;Gennifer Flowers,1594;Neil Cavuto,
          3362;Alicia Machado,1700;Hillary Clinton,294;Hillary Clinton,538;
          Hillary Clinton,808;Hillary Clinton,1802;Hillary Clinton,2303;Hillary
           Clinton,4226
        >>> results = gd.Search(['2016 Oct 10'], table='gkg',output='r')

        Notes
        ------
        Read more about GDELT data at http://gdeltproject.org/data.html

        gdeltPyR retrieves Global Database of Events, Language, and Tone
        (GDELT) data (version 1.0 or version 2.0) via parallel HTTP GET
        requests and is an alternative to accessing GDELT
        data via Google BigQuery.

        Performance will vary based on the number of available cores
        (i.e. CPUs), internet connection speed, and available RAM. For
        systems with limited RAM, Later iterations of gdeltPyR will include
        an option to store the output directly to disc.

        """
        date_input_check(date, self.version)
        self.coverage = coverage
        self.date = date
        version = self.version
        baseUrl = self.baseUrl
        self.queryTime = queryTime
        self.table = table
        self.translation = translation
        self.datesString = gdeltRangeString(dateRanger(self.date),
                                            version=version,
                                            coverage=self.coverage)

        #################################
        # R dataframe check; fail early
        #################################
        if output == 'r':
            try:
                import feather

            except ImportError:
                raise ImportError(('You need to install `feather` in order '
                                   'to output data as an R dataframe. Keep '
                                   'in mind the function will return a '
                                   'pandas dataframe but write the R '
                                   'dataframe to your current working '
                                   'directory as a `.feather` file.  Install '
                                   'by running\npip install feather\nor if '
                                   'you have Anaconda (preferred)\nconda '
                                   'install feather-format -c conda-forge\nTo '
                                   'learn more about the library visit https:/'
                                   '/github.com/wesm/feather'))

        ##################################
        # Partial Functions
        #################################

        v1RangerCoverage = partial(gdeltRangeString, version=1, coverage=True)
        v2RangerCoverage = partial(gdeltRangeString, version=2, coverage=True)
        v1RangerNoCoverage = partial(gdeltRangeString,
                                     version=1,
                                     coverage=False)
        v2RangerNoCoverage = partial(gdeltRangeString,
                                     version=2,
                                     coverage=False)
        urlsv1gkg = partial(urlBuilder, version=1, table='gkg')
        urlsv2mentions = partial(urlBuilder,
                                 version=2,
                                 table='mentions',
                                 translation=self.translation)
        urlsv2events = partial(urlBuilder,
                               version=2,
                               table='events',
                               translation=self.translation)
        urlsv1events = partial(urlBuilder, version=1, table='events')
        urlsv2gkg = partial(urlBuilder,
                            version=2,
                            table='gkg',
                            translation=self.translation)

        eventWork = partial(mp_worker, table='events')
        codeCams = partial(cameos, codes=codes)

        #####################################
        # GDELT Version 2.0 Headers
        #####################################

        if int(self.version) == 2:
            ###################################
            # Download 2.0 Headers
            ###################################

            self.events_columns = events2Heads()
            self.mentions_columns = mentionsHeads()
            self.gkg_columns = gkgHeads()

        #####################################
        # GDELT Version 1.0 Analytics, Header, Downloads
        #####################################

        if int(self.version) == 1:

            if self.table is "mentions":
                raise BaseException('GDELT 1.0 does not have the "mentions"'
                                    ' table. Specify the "events" or "gkg"'
                                    'table.')
            else:
                pass

            self.events_columns = events1Heads()
            columns = self.events_columns

            if self.table == 'gkg':
                self.download_list = (urlsv1gkg(
                    v1RangerCoverage(dateRanger(self.date))))

            elif self.table == 'events' or self.table == '':

                if self.coverage is True:

                    self.download_list = (urlsv1events(
                        v1RangerCoverage(dateRanger(self.date))))

                else:
                    # print("I'm here at line 125")
                    self.download_list = (urlsv1events(
                        v1RangerNoCoverage(dateRanger(self.date))))

            else:
                raise Exception('You entered an incorrect table type for '
                                'GDELT 1.0.')
        #####################################
        # GDELT Version 2.0 Analytics and Download
        #####################################
        elif self.version == 2:

            if self.table == 'events' or self.table == '':
                columns = self.events_columns
                if self.coverage is True:

                    self.download_list = (urlsv2events(
                        v2RangerCoverage(dateRanger(self.date))))
                else:

                    self.download_list = (urlsv2events(
                        v2RangerNoCoverage(dateRanger(self.date))))

            if self.table == 'gkg':
                columns = self.gkg_columns
                if self.coverage is True:

                    self.download_list = (urlsv2gkg(
                        v2RangerCoverage(dateRanger(self.date))))
                else:
                    self.download_list = (urlsv2gkg(
                        v2RangerNoCoverage(dateRanger(self.date))))
                    # print ("2 gkg", urlsv2gkg(self.datesString))

            if self.table == 'mentions':
                columns = self.mentions_columns
                if self.coverage is True:

                    self.download_list = (urlsv2mentions(
                        v2RangerCoverage(dateRanger(self.date))))

                else:

                    self.download_list = (urlsv2mentions(
                        v2RangerNoCoverage(dateRanger(self.date))))

        #########################
        # DEBUG Print Section
        #########################

        # if isinstance(self.datesString,str):
        #     if parse(self.datesString) < datetime.datetime.now():
        #         self.datesString = (self.datesString[:8]+"234500")
        # elif isinstance(self.datesString,list):
        #     print("it's a list")
        # elif isinstance(self.datesString,np.ndarray):
        #     print("it's an array")
        # else:
        #     print("don't know what it is")
        # print (self.version,self.download_list,self.date, self.table, self.coverage, self.datesString)
        #
        # print (self.download_list)
        # if self.coverage:
        #     coverage = 'True'
        # else:
        #     coverage = 'False'
        # if isinstance(self.date, list):
        #
        #     formattedDates = ["".join(re.split(' |-|;|:', l)) for l in
        #                       self.date]
        #     path = formattedDates
        #     print("gdeltVersion_" + str(self.version) +
        #           "_coverage_" + coverage + "_" +
        #           "_table_" + self.table + '_queryDates_' +
        #           "_".join(path) +
        #           "_queryTime_" +
        #           datetime.datetime.now().strftime('%m-%d-%YT%H%M%S'))
        # else:
        #     print("gdeltVersion_" + str(self.version) +
        #           "_coverage_" + coverage + "_" +
        #           "_table_" + self.table + '_queryDates_' +
        #           "".join(re.split(' |-|;|:', self.date)) +
        #           "_queryTime_" +
        #           datetime.datetime.now().strftime('%m-%d-%YT%H%M%S'))

        #########################
        # Download section
        #########################
        # print(self.download_list,type(self.download_list))

        # from gdelt.extractors import normalpull
        # e=ProcessPoolExecutor()
        # if isinstance(self.download_list,list) and len(self.download_list)==1:
        #     from gdelt.extractors import normalpull
        #
        #     results=normalpull(self.download_list[0],table=self.table)
        # elif isinstance(self.download_list,list):
        #     print(table)
        #     multilist = list(e.map(normalpull,self.download_list))
        #     results = pd.concat(multilist)
        # print(results.head())

        if isinstance(self.datesString, str):
            if self.table == 'events':

                results = eventWork(self.download_list)
            else:
                # if self.table =='gkg':
                #     results = eventWork(self.download_list)
                #
                # else:
                results = mp_worker(self.download_list)

        else:

            if self.table == 'events':

                pool = Pool(processes=cpu_count())
                downloaded_dfs = list(
                    pool.imap_unordered(eventWork, self.download_list))
            else:

                pool = NoDaemonProcessPool(processes=cpu_count())
                downloaded_dfs = list(
                    pool.imap_unordered(mp_worker, self.download_list))
            pool.close()
            pool.terminate()
            pool.join()
            results = pd.concat(downloaded_dfs)
            del downloaded_dfs
            results.reset_index(drop=True, inplace=True)

        # print(results.columns,columns,self.table,self.version)
        if self.table == 'gkg' and self.version == 1:
            results.columns = results.ix[0].values.tolist()
            results.drop([0], inplace=True)
            columns = results.columns
        if len(results.columns) == 57:
            results.columns = columns[:-1]

        else:
            results.columns = columns

        if (len(results)) == 0:
            raise ValueError("This GDELT query returned no data. Check "
                             "internet connection or query parameters and "
                             "retry")

        # Add column of human readable codes; need updated CAMEO
        if self.table == 'events':
            cameoDescripts = results.EventCode.apply(codeCams)

            results.insert(27,
                           'CAMEOCodeDescription',
                           value=cameoDescripts.values)

        ###############################################
        # Setting the output options
        ###############################################

        if output == 'df':
            self.final = results
        elif output == 'json':
            self.final = results.to_json(orient='records')
        elif output == 'csv':
            self.final = results.to_csv(encoding='utf-8')
        elif output == 'gpd' or output == 'geodataframe' or output == 'geoframe':
            self.final = geofilter(results)
            self.final = self.final[self.final.geometry.notnull()]
        elif output == 'r':

            if self.coverage:
                coverage = 'True'
            else:
                coverage = 'False'
            if isinstance(self.date, list):

                formattedDates = [
                    "".join(re.split(' |-|;|:', l)) for l in self.date
                ]
                path = formattedDates
                outPath = (
                    "gdeltVersion_" + str(self.version) + "_coverage_" +
                    coverage + "_" + "_table_" + self.table + '_queryDates_' +
                    "_".join(path) + "_queryTime_" +
                    datetime.datetime.now().strftime('%m-%d-%YT%H%M%S') +
                    ".feather")
            else:
                outPath = (
                    "gdeltVersion_" + str(self.version) + "_coverage_" +
                    coverage + "_" + "_table_" + self.table + '_queryDates_' +
                    "".join(re.split(' |-|;|:', self.date)) + "_queryTime_" +
                    datetime.datetime.now().strftime('%m-%d-%YT%H%M%S') +
                    ".feather")

            if normcols:
                results.columns = list(
                    map(lambda x: (x.replace('_', "")).lower(),
                        results.columns))

            feather.api.write_dataframe(results, outPath)
            return results

        else:
            self.final = results

        #########################
        # Return the result
        #########################
        if normcols:
            self.final.columns = list(
                map(lambda x: (x.replace('_', "")).lower(),
                    self.final.columns))

        return self.final

예제 #9

0

파일 보기

    def Search(
        self,
        date,
        table='events',
        headers=None,
        coverage=None,
        queryTime=datetime.datetime.now().strftime('%m-%d-%Y %H:%M:%S')):
        """Placeholder text"""
        dateInputCheck(date, self.version)
        self.coverage = coverage
        self.date = date
        version = self.version
        baseUrl = self.baseUrl
        self.table = table
        self.datesString = gdeltRangeString(dateRanger(self.date),
                                            version=version,
                                            coverage=self.coverage)

        ##################################
        # Partial Functions
        #################################

        v1RangerCoverage = partial(gdeltRangeString, version=1, coverage=True)
        v2RangerCoverage = partial(gdeltRangeString, version=2, coverage=True)
        v1RangerNoCoverage = partial(gdeltRangeString,
                                     version=1,
                                     coverage=False)
        v2RangerNoCoverage = partial(gdeltRangeString,
                                     version=2,
                                     coverage=False)

        urlsv1gkg = partial(urlBuilder, version=1, table='gkg')
        urlsv2mentions = partial(urlBuilder, version=2, table='mentions')
        urlsv2events = partial(urlBuilder, version=2, table='events')
        urlsv1events = partial(urlBuilder, version=1, table='events')
        urlsv2gkg = partial(urlBuilder, version=2, table='gkg')

        eventWork = partial(mp_worker, table='events')
        codeCams = partial(cameos, codes=codes)

        #####################################
        # GDELT Version 2.0 Headers
        #####################################

        if int(self.version) == 2:
            ###################################
            # Download 2.0 Headers
            ###################################

            self.events_columns = events2Heads()
            self.mentions_columns = mentionsHeads()
            self.gkg_columns = gkgHeads()

        #####################################
        # GDELT Version 1.0 Analytics, Header, Downloads
        #####################################

        if int(self.version) == 1:

            if self.table is "mentions":
                raise BaseException('GDELT 1.0 does not have the "mentions'
                                    ' table. Specify the "events" or "gkg"'
                                    'table.')
            else:
                pass

            self.events_columns = events1Heads()
            columns = self.events_columns

            if self.table == 'gkg':
                self.download_list = (urlsv1gkg(
                    v1RangerCoverage(dateRanger(self.date))))

            elif self.table == 'events' or self.table == '':

                if self.coverage is True:

                    self.download_list = (urlsv1events(
                        v1RangerCoverage(dateRanger(self.date))))

                else:
                    # print("I'm here at line 125")
                    self.download_list = (urlsv1events(
                        v1RangerNoCoverage(dateRanger(self.date))))
            else:
                raise Exception('You entered an incorrect table type for '
                                'GDELT 1.0.')

        #####################################
        # GDELT Version 2.0 Analytics and Download
        #####################################
        elif self.version == 2:

            if self.table == 'events' or self.table == '':
                columns = self.events_columns
                if self.coverage is True:

                    self.download_list = (urlsv2events(
                        v2RangerCoverage(dateRanger(self.date))))
                else:
                    self.download_list = (urlsv2events(
                        v2RangerNoCoverage(dateRanger(self.date))))

            if self.table == 'gkg':
                columns = self.gkg_columns
                if self.coverage is True:

                    self.download_list = (urlsv2gkg(
                        v2RangerCoverage(dateRanger(self.date))))
                else:
                    self.download_list = (urlsv2gkg(
                        v2RangerNoCoverage(dateRanger(self.date))))
                    # print ("2 gkg", urlsv2gkg(self.datesString))

            if self.table == 'mentions':
                columns = self.mentions_columns
                if self.coverage is True:

                    self.download_list = (urlsv2mentions(
                        v2RangerCoverage(dateRanger(self.date))))

                else:

                    self.download_list = (urlsv2mentions(
                        v2RangerNoCoverage(dateRanger(self.date))))

        #########################
        # DEBUG Print
        #########################
        # print (self.version, self.table, self.coverage, self.datesString,
        #
        # print (self.download_list)

        #########################
        # Download section
        #########################

        if isinstance(self.datesString, str):

            if self.table == 'events':

                results = eventWork(self.download_list)
            else:
                results = mp_worker(self.download_list)
        else:

            if self.table == 'events':
                p
                pool = Pool(processes=cpu_count())
                downloaded_dfs = list(
                    pool.imap_unordered(eventWork, self.download_list))
            else:

                pool = Pool(processes=cpu_count())
                downloaded_dfs = list(
                    pool.imap_unordered(mp_worker, self.download_list))
            pool.close()
            pool.terminate()
            pool.join()
            results = pd.concat(downloaded_dfs)
            del downloaded_dfs
            results.reset_index(drop=True, inplace=True)

        if self.table == 'gkg' and self.version == 1:
            results.columns = results.ix[0].values.tolist()
            results.drop([0], inplace=True)

        else:
            results.columns = columns

        if (len(results)) == 0:
            raise ValueError("This GDELT query returned no data. Check "
                             "internet connection or query parameters and "
                             "retry")

        # Add column of human readable codes; need updated CAMEO
        if self.table == 'events':
            cameoDescripts = results.EventCode.apply(codeCams)

            results.insert(27,
                           'CAMEOCodeDescription',
                           value=cameoDescripts.values)

        self.final = results

        #########################
        # Return the result
        #########################
        return self.final

예제 #10

0

파일 보기

파일: test_gdeltRangeString.py 프로젝트: smritigambhir/gdeltPyR

 def test_gdeltrange_sequence_v1_2005(self):
     date_sequence = ['2001 Feb 01', '2005 Feb 05']
     ranger_output = dateRanger(date_sequence)
     gdeltstring_test = np.sort(np.array(gdeltRangeString(ranger_output, version=1)))
     exp = np.sort(np.array(['2001', '2002', '2003', '2004', '2005']))
     np.testing.assert_array_equal(exp, gdeltstring_test)