Beispiel #1
0
    def __init__(self, dir_path=None, develop=True):
        """
        Initializes a GTITAN instance with the desired directory.
        :param dir_path:  path where to create a directory. If left to None, uses default package directory;
        """

        self.develop = develop
        if dir_path is None:
            #This queries 1 path up
            self.dir_path = os.path.dirname(os.path.abspath(__file__))
            if not os.path.exists(os.path.join(self.dir_path, "checkpath")):
                os.makedirs(os.path.join(self.dir_path, "checkpath"))
        else:
            #TODO: make all folders if new folder indicated: copy default files there such as the config.
            self.dir_path = dir_path
            if not os.path.exists(dir_path):
                default_path = os.path.dirname(os.path.abspath(__file__))
                # creating directory structure
                os.makedirs(os.path.join(self.dir_path, "config"))
                os.makedirs(os.path.join(self.dir_path, "data",
                                         "Translations"))
                os.makedirs(
                    os.path.join(self.dir_path, "data", "Trend_indices"))

                # copying defaults
                shutil.copyfile(
                    os.path.join(default_path, "config", "config_py.json"),
                    os.path.join(self.dir_path, "config", "config_py.json"))
            else:
                print("Directory already exists, loading data from it.")

        print(f"Using directory2 '{self.dir_path}'", __file__)
        with open(os.path.join(self.dir_path, "config", "config_py.json"),
                  'r') as fp:
            self.CONFIG = json.load(fp)

        #set configuring settings:
        self.CONFIG['CONN']['timeout'] = tuple(self.CONFIG['CONN']['timeout'])
        self.t_sleep = 0.2  #also get it from
        self.google_timeout = 14 * 60 * 60
        self.t_block = 0

        #Call instances of used packages
        self.translator = google_translator()
        #self.pytrend = TrendReq(hl='en-US', **self.CONFIG['CONN'])
        self.pytrend = TrendReq()
        self.gtab = gtab.GTAB()

        # sets default anchorbank
        default_project = "frontex"
        self.set_active_project(default_project)
 def __init__(self, timeframe, geo_code=''):
     self.timeframe = timeframe
     geo_code = geo_code.upper()
     self.anchorbank = 'google_anchorbank_geo=' + geo_code + '_timeframe=' + timeframe + '.tsv'
     self.t = gtab.GTAB(self.GTAB_DIR)
     self.t.set_options(pytrends_config={
         'geo': geo_code,
         'timeframe': timeframe
     })
     if not os.path.exists(
             os.path.join(self.GTAB_DIR, 'output/google_anchorbanks/',
                          self.anchorbank)):
         print("Creating Anchorbank...")
         self.t.create_anchorbank()
     self.t.set_active_gtab(self.anchorbank)
Beispiel #3
0
def main():
    t = gtab.GTAB()
    t.set_active_gtab(
        "google_anchorbank_geo=_timeframe=2019-01-01 2020-08-01.tsv")

    df = pd.read_csv('player.csv')

    inputs = range(df.shape[0])
    #  removing processes argument makes the code run on all available cores
    pool = mp.Pool(processes=2)
    func = partial(processInput, df, t)
    pops = pool.map(func, inputs)

    df['popularity_trends'] = pops
    df.to_csv('player_with_pop.csv', index=False)
Beispiel #4
0
def google_trends(df, d1, d2):
    """Pulls movie data from google trends and merges"""
    df.loc[:, 'google trends'] = 0
    for movie in df['Release'].unique():
        try:
            t = gtab.GTAB()
            t.set_options(
                pytrends_config={"timeframe": f"{str(d1)} {str(d2)}"})
            query = t.new_query(movie)
            #print(query)
            for date in query.index:
                print(query.loc[date, 'max_ratio'])
                df.loc[df['Release'].eq(movie) & df['Date'].eq(date),
                       'google trends'] = query.loc[date, 'max_ratio']
        except:
            continue
    return df
Beispiel #5
0
def create_and_set_gtab(start_timeframe,
                        end_timeframe,
                        geo,
                        gtab_path="gtab_data"):
    """
    Creates and sets a gtab to the required options.
    This functions takes a lot of time if the anchorbank was not yet created.
    It also creates a directory if needed to the gtab_path

    Args:
    -------
        start_timeframe (str): start of the timeframe for the queries (included)
        end_timeframe (str): end of the timeframe for the queries (included)
        geo (str): geolocalisation of the search query (Two Uppercase letter (ex: US, CH...) or "" for worldwide)
        gtab_path (str): path to already existing data

    Returns:
    -------
        t (GTAB): GoogleTrendsAnchorBank to use for the queries, consistent with the provided options

    """

    t = gtab.GTAB(dir_path=gtab_path)
    # Create time frame
    timeframe = start_timeframe + " " + end_timeframe

    # Set required options
    t.set_options(pytrends_config={"geo": geo, "timeframe": timeframe})

    # Create anchorbank if it doesn't already exist
    t.create_anchorbank(
    )  # takes a while to run since it queries Google Trends.

    # We apply the anchorbank
    t.set_active_gtab(f"google_anchorbank_geo={geo}_timeframe={timeframe}.tsv")

    return t
Beispiel #6
0
import pandas as pd
import gtab

df = pd.read_csv("./data/country_codes_filtered.csv")


# this creates a new folder to the gtab!
my_path = "./anchorbanks"
t = gtab.GTAB(dir_path=my_path)

for i in df.Code.values[::-1]:
    try:
        t.set_options(pytrends_config={"geo": i,
                                       "timeframe": "2019-01-01 2020-12-31"})
        t.create_anchorbank()  # takes a while to run since it queries Google Trends.
    except:
        print("error", i)
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib
import gtab

t = gtab.GTAB()

query_swaziland = t.new_query("swaziland")
query_facebook = t.new_query("Facebook")
query_google = t.new_query("Google")

# result1.png

plt.plot(query_swaziland.index,
         np.ceil(query_swaziland.max_ratio.values / 1.52) * 1,
         color="#F44336",
         label="Swaziland Before Calibration",
         ls=":")
plt.plot(query_swaziland.max_ratio, color="#F44336", label="Swaziland")

plt.plot(query_facebook.max_ratio, color="#2196F3", label="Facebook")
plt.yscale("log")
plt.xlabel("Date")
plt.ylabel("Popularity")
plt.legend()
fig = matplotlib.pyplot.gcf()
fig.set_size_inches(10, 3.5)
plt.title("gtab-corrected trends")
plt.xlim([pd.to_datetime("2019-09-01"), pd.to_datetime("2020-08-01")])
plt.show()