コード例 #1
0
ファイル: wiki_scraper.py プロジェクト: Julialiuu/KEX
def wiki_scraper(workbook, sheet1, object_list):
    iteration = 0
    for element in object_list:
        try:
            #print('1')
            #print(element.titel1)
            #print(element.lastdate)
            #print(element.firstdate)
            titel1 = element.titel1
            titel2 = element.titel2
            titel3 = element.titel3
            print(titel2, titel3)
            """
			views = pageviewapi.per_article('sv.wikipedia', element.title1, element.firstdate, element.lastdate,
                        	access='all-access', agent='all-agents', granularity='daily')
			"""
            views = pageviewapi.per_article('en.wikipedia',
                                            element.titel1,
                                            element.lastdate,
                                            element.firstdate,
                                            access='all-access',
                                            agent='all-agents',
                                            granularity='daily')
            views = select_views(views)
        #print(1, views)
        except:
            try:
                print(2)
                print(titel2)
                views = pageviewapi.per_article('en.wikipedia',
                                                element.titel2,
                                                element.lastdate,
                                                element.firstdate,
                                                access='all-access',
                                                agent='all-agents',
                                                granularity='daily')
                views = select_views(views)
            except:
                try:
                    print(3)
                    print(element.titel3)
                    views = pageviewapi.per_article('en.wikipedia',
                                                    element.titel3,
                                                    element.lastdate,
                                                    element.firstdate,
                                                    access='all-access',
                                                    agent='all-agents',
                                                    granularity='daily')
                    views = select_views(views)
                except:
                    views = 'N/A'
        write_to_file(sheet1, views, iteration, element)
        iteration = iteration + 1
    workbook.save('Wiki_workbook.xls')
    print 'hola!'
コード例 #2
0
ファイル: WikiData.py プロジェクト: Sommerli/stock-prediction
def get_daily_wiki_data(
    TICKER,
    start='2017-01-01',
    end=date.today().strftime(
        "%Y-%m-%d")):  # Gjer "end" funksjonen at vi ikkje får siste dagen med?
    """Function that takes ticker, and name of firm as input and retrieves relevant data as pandas dataframe"""
    Name = TICKER
    out_df = pd.DataFrame()

    # ### wikipedia data
    wikistart = start.replace('-', '')
    wikiend = end.replace("-", "")
    try:
        wikidata = wik.per_article('en.wikipedia',
                                   Name,
                                   wikistart,
                                   wikiend,
                                   access='all-access',
                                   agent='all-agents',
                                   granularity='daily')
        df_wiki = pd.DataFrame(wikidata['items'])
        df_wiki['Date'] = pd.to_datetime(df_wiki['timestamp'],
                                         format='%Y%m%d%H')
        df_wiki.set_index('Date', inplace=True)
        out_df['wikiviews'] = df_wiki['views']
    except:
        print('fungerte ikke')

    return out_df.dropna(
    )  # drops nan values, at least 14 drops because of williams R is computed by 14day rolling window, nan for first days
コード例 #3
0
def fillMissingValues(dataframe):
    pageTitle, accessDetails = extractTitleAndAccessDetails(dataframe)
    dataIndex = pd.concat([pageTitle, accessDetails], join='outer', axis=1)
    beforeFilling = dataframe.isnull().sum()
    dates = list(dataframe.columns)
    dates = dates[1:]
    for date in dates:
        nullValues = dataframe[date].isnull().to_numpy().nonzero()
        for index in nullValues[0]:
            try:
                dataframe.at[dataIndex['Page Title'][index],
                             date] = pageviewapi.per_article(
                                 dataIndex['Domain'][index],
                                 dataIndex['Page Title'][index],
                                 date.replace('-', ''),
                                 date.replace('-', ''),
                                 dataIndex['Access'][index],
                                 dataIndex['Viewer Type'][index],
                                 granularity='daily')['items'][0]['views']
                print("Filled", dataIndex['Page Title'][index], date)
            except pageviewapi.client.ZeroOrDataNotLoadedException:
                print("No data available.")
                continue
    afterFilling = dataframe.isnull().sum()
    print('Before filling the missing values: ', beforeFilling)
    print('After filling the missing values: ', afterFilling)
    return dataframe
コード例 #4
0
def get_number_of_pageviews(article_name):
    pageview_data = pageviewapi.per_article(
        'fr.wikipedia', article_name, start='20000101', end='20201020', granularity='monthly')
    pageview_count = 0
    for item in pageview_data['items']:
        pageview_count += item['views']

    return pageview_count
コード例 #5
0
def get_pageviews(start_date, end_date, page):
    views = 0
    try:
        out = pageviewapi.per_article(page.site.code + '.wikipedia', page.title(), start_date, end_date,
                                      access='all-access', agent='all-agents', granularity='daily')
        for i in range(len(out["items"])):
            views += out["items"][i]["views"]
        return views
    except pageviewapi.client.ZeroOrDataNotLoadedException:
        return 0
コード例 #6
0
def get_page_views(start_date, end_date, page_name, granularity):
    """Get number of views of a page for a specific food using PageView API from Wikipedia."""
    page_views = pv.per_article(page=page_name,
                                project='en.wikipedia',
                                start=start_date,
                                end=end_date,
                                granularity=granularity).get('items')
    views = 0
    for i in range(len(page_views)):
        views += page_views[i].get('views')
    return page_name, views
コード例 #7
0
def get_popularity_score(page_name):
    """Get number of views of a page for a specific food using PageView API from Wikipedia."""
    today = datetime.date.today().strftime('%Y%m%d')
    last_year_today = (datetime.date.today() -
                       datetime.timedelta(days=365)).strftime('%Y%m%d')
    page_views = pv.per_article(page=page_name,
                                project='en.wikipedia',
                                start=last_year_today,
                                end=today,
                                granularity='monthly').get('items')
    views = 0
    for i in range(len(page_views)):
        views += page_views[i].get('views')
    return views
コード例 #8
0
def parse(row):
    days_before = 15
    days_after = 15
    day_idx = np.arange(days_before + days_after)
    print(row[0])
#    if row[0] % 100 == 0:
#        time.sleep(1)
    index = row[0]
    row = row[1]
    topic = row[0]
    #topic = topic.replace('#', '')
    # print(topic)

    start_date = (row[1] - pd.Timedelta(days=days_before)).strftime('%Y%m%d')
    end_date = (row[1] + pd.Timedelta(days=days_after)).strftime('%Y%m%d')
    # print(start_date, end_date)

    try:
        suggestion = wiki.search(topic)
        if len(suggestion) > 0:
            #print('Interpreting topic ' + topic + ' as ' + suggestion[0])
            interpreted_topic = suggestion[0]
            #opic_views = pgviews.per_article('en.wikipedia', topic, start_date, end_date, agent='user', access='all-access', granularity='daily')
        else:
            interpreted_topic = topic
            return [index, topic] + [interpreted_topic]+[row[1]]+list(np.full(days_before+days_after+1, np.nan))

        topic_views = pgviews.per_article('en.wikipedia', interpreted_topic, start_date, end_date, agent='user', access='all-access', granularity='daily')['items']
        # print(topic_views, '--------------\n')

        if len(topic_views) < 16:
            return [index, topic] + [interpreted_topic]+[row[1]]+list(np.full(days_before+days_after+1, np.nan))

    except Exception:
        interpreted_topic = topic
        return [index, topic] + [interpreted_topic]+[row[1]]+list(np.full(days_before+days_after+1, np.nan))


    day_list = topic_views
    views_list = np.array(list(map(lambda i: i['views'], day_list)))

    #print(views_list)
    views_list = interpolate_zeros(views_list)

    pct_change = np.diff(views_list) / views_list[0:-1] * 100
    if np.max(pct_change[13:16]) < 10:
        return [index, topic] + [interpreted_topic]+[row[1]]+list(np.full(days_before+days_after+1, np.nan))

    return [index, topic] + [interpreted_topic] + [row[1]] + list(views_list) + [np.mean(views_list)]
コード例 #9
0
def views(article_name, start_date, end_date):
    '''Input: a wiki article name, and the desired date range for daily pageview data to be collected
       Output: a list containing pageviews of the article for specified range
    '''
    page_views = []
    d = pageviewapi.per_article('en.wikipedia',
                                article_name,
                                start_date,
                                end_date,
                                access='all-access',
                                agent='all-agents',
                                granularity='daily')
    for i in range(len(d['items'])):
        page_views.append(d['items'][i]['views'])
    return (page_views)
コード例 #10
0
def getPageviews(articleName, lang):
    '''
  Get pageviews for given article and language version of Wikipedia, convert to
  dataframe.
  ---
  Parameters: article name, language abbreviated ('en', 'hi', 'ur', ...)
  Returns: dataframe. 1 row per day; columns are project, article, views, etc
  ---
  '''
    dailyPageViewsDict = pageviewapi.per_article(lang + '.wikipedia',
                                                 articleName,
                                                 start,
                                                 end,
                                                 agent='all-agents',
                                                 granularity='daily')
    return pd.DataFrame.from_dict(dailyPageViewsDict['items'])
コード例 #11
0
ファイル: wikipedia_trends.py プロジェクト: lsmidt/hedge
    def get_wiki_views(self, article: str, start_date, end_date):
        """
        return dict of datetime : views for the period between start and end date (both inclusive)
        """

        _st = self._to_wiki_date_string(start_date)
        _nd = self._to_wiki_date_string(end_date)

        try:
            full_result = pageviewapi.per_article('en.wikipedia', article, _st, _nd,\
                            access='all-access', agent='all-agents', granularity='daily')
        except pageviewapi.client.ZeroOrDataNotLoadedException as e:
            full_result = dict(items=[])

        result_subset = {}

        for item in full_result["items"]:
            py_dt = self._to_datetime(item["timestamp"])
            result_subset[py_dt] = item["views"]

        return result_subset
コード例 #12
0
ファイル: util.py プロジェクト: shogun-toolbox/applications
    def get_pageviews(self, country, current_date):
        logging.info('getting pageviews for ' + country)
        project = config.PREFIX[config.LANGUAGE[country]] + '.wikipedia'
        filepath = config.keywords_path / (country + '.txt')

        current_date = current_date - timedelta(days=1)
        end = current_date.strftime('%Y%m%d')
        start_date = current_date - timedelta(days=6)
        start = start_date.strftime('%Y%m%d')

        features = pd.Series()
        logging.info('from ' + str(start) + ' to ' + str(end))
        with open(str(filepath.absolute()), 'r') as file:
            for line in file:
                line = line[:-1]
                count = 0
                try:
                    res = pageviewapi.per_article(project,
                                                  line.strip(),
                                                  start,
                                                  end,
                                                  access='all-access',
                                                  agent='all-agents',
                                                  granularity='daily')

                    for item in res['items']:
                        count += int(item['views'])
                except ZeroOrDataNotLoadedException:
                    logging.info(
                        'ZeroOrDataNotLoadedException returned, saving '
                        'pageviews as 0.')
                    count = 0
                except ThrottlingException:
                    logging.info(
                        'ThrottlingException returned, saving pageviews as 0.')
                    count = 0
                features[line] = count
                logging.info('\tpageviews for ' + line + ' are ' + str(count))

        return features
コード例 #13
0
def fetchwikiData(dataset, dataframe, path, startDate, endDate, granularity):
  print("Checking for Duplicates")
  dropDuplicates(dataset, path)
  count = 0
  fileCount = 0
  prefetchedFiles = os.listdir(path)
  if len(prefetchedFiles) != 0:
    fileCount = len(prefetchedFiles) 
    prefetchedFiles = pd.DataFrame(prefetchedFiles, columns = ['Page'])
    prefetchedFiles1 = prefetchedFiles.merge(dataset['Page'])
    df = prefetchedFiles.set_index('Page').drop(prefetchedFiles1['Page']).reset_index()
    for _ in list(df['Page']):
      os.remove(os.path.join(path, str(_)))
    dataset = dataset.set_index('Page').drop(prefetchedFiles1['Page']).reset_index()
    dataframe = extractTitleAndAccessDetails(dataset)
    dataframe['Domain'] = dataframe['Domain'].str.replace('.org', '')
  print("Beginning Download.\n")
  for domain, pageTitle, accessDetails, viewerType in zip(dataframe['Domain'], dataframe['Page'], dataframe['Access'], dataframe['Viewer Type']):
    try:
      jsonResults = pageviewapi.per_article(domain, pageTitle.replace('%0%0%0', '/'), startDate, endDate, accessDetails, viewerType, granularity)
      try:
        writeJSONFile(path, pageTitle, domain, accessDetails, viewerType, jsonResults):
        fileCount = fileCount+ 1
        print("Saved", jsonResults['items'][0]['article'], fileCount)
      except OSError:
        fileCount, count = errorHandling(fileCount, count)
        continue
    except pageviewapi.client.ZeroOrDataNotLoadedException:
      try:
        with open(path + "/" + str(pageTitle) + "_" + domain + ".org" + "_" + accessDetails + "_" + viewerType,  "w") as write_file:
          json.dump('', write_file)
      except OSError:
        fileCount, count = errorHandling(fileCount, count)
        continue
      fileCount, count = errorHandling(fileCount, count)
      continue
    
  print("Download Completed.\n")
コード例 #14
0
def fetch_timeseries_wikipedia(keyword, save_csv=True):
    try:
        interest_over_time = pageviewapi.per_article('en.wikipedia',
                                                     keyword,
                                                     '20151101',
                                                     '20191101',
                                                     access='all-access',
                                                     agent='all-agents',
                                                     granularity='daily')
    except:
        print('The chosen article doesn\'t exist')
        return None

    # Save in a csv if needed
    if save_csv:
        # Csv naming and path
        data_path = 'data/wikipedia/'
        if not os.path.exists(data_path): os.makedirs(data_path)
        file_name = data_path + keyword.lower() + '_wikipedia_interest.csv'
        interest_over_time_df = pd.DataFrame(interest_over_time)
        interest_over_time_df.to_csv(file_name, index=False, encoding='utf-8')

    return interest_over_time
コード例 #15
0
def average_page_view(entity):
    """
    Takes the table and returns the average page views of the Wikipedia page over a given time interval
    :param table:
    :return:
    """
    start_date = '20200101'  # January 1 2020
    end_date = '20200326'  # March 26 2020
    n_of_page_views = 0
    try:
        page_views = pageviewapi.per_article('en.wikipedia',
                                             entity,
                                             start_date,
                                             end_date,
                                             access='all-access',
                                             agent='all-agents',
                                             granularity='daily')
        for article in page_views['items']:
            n_of_page_views += article['views']
    except:
        n_of_page_views = 0

    return n_of_page_views / 86
コード例 #16
0
import csv
import pageviewapi


with open('party_name.csv','r',encoding='utf-8') as inp, open('qwer.csv','w') as out:
    writer=csv.writer(out)
    s=0
    for row in csv.reader(inp):
        if row:
            try:
                s+=1v = pageviewapi.per_article('en.wikipedia', row[0], '20151106','20191120', access='all-access', agent='all-agents', granularity='daily')
                for i in v['items']:
                    
                    list1=[]
                    list1.append(row[0])
                    list1.append(i['timestamp'])
                    list1.append(i['views'])
                    
                    writer.writerow(list1)
                    print(list1)
            except Exception as e:
                print(e)
print("done")
                
    
コード例 #17
0
def get_wiki_pageviews(twitter_file: str, wiki_file: str):
    """
    Get Wikipedia pageviews for each Twitter trend and save them in a csv
    :param twitter_file: path to csv with Twitter trends
    :param wiki_file: path to output csv where Wiki pageviews will be written
    """
    timeStamps = pd.read_csv(twitter_file, header=0, index_col=0)
    ts_per_article = pd.DataFrame(columns=[
        'trend', 't-15', 't-14', 't-13', 't-12', 't-11', 't-10', 't-9', 't-8',
        't-7', 't-6', 't-5', 't-4', 't-3', 't-2', 't-1', 't'
    ])

    for index, row in timeStamps.iterrows():
        days_before = 15
        # topic = row[2]
        topic = row.name
        start_date = (datetime.today() -
                      timedelta(days=days_before + 1)).strftime('%Y%m%d')
        end_date = datetime.today().strftime('%Y%m%d')

        try:
            suggestion = wiki.search(topic)
            if len(suggestion) > 0:
                interpreted_topic = suggestion[0]
            else:
                interpreted_topic = 'NA'

            interpreted_topic = topic
            if interpreted_topic is not 'NA':
                topic_views = pgviews.per_article('en.wikipedia',
                                                  interpreted_topic,
                                                  start_date,
                                                  end_date,
                                                  agent='user',
                                                  access='all-access',
                                                  granularity='daily')['items']
                day_list = topic_views
                views_list = np.array(list(map(lambda i: i['views'],
                                               day_list)))
                views_list = interpolate_zeros(views_list)
                views_list = np.array(views_list).astype(int)

                ts_per_article = ts_per_article.append(
                    {
                        'trend': interpreted_topic,
                        't-15': views_list[0],
                        't-14': views_list[1],
                        't-13': views_list[2],
                        't-12': views_list[3],
                        't-11': views_list[4],
                        't-10': views_list[5],
                        't-9': views_list[6],
                        't-8': views_list[7],
                        't-7': views_list[8],
                        't-6': views_list[9],
                        't-5': views_list[10],
                        't-4': views_list[11],
                        't-3': views_list[12],
                        't-2': views_list[13],
                        't-1': views_list[14],
                        't': views_list[15]
                    },
                    ignore_index=True)

        except Exception:
            interpreted_topic = topic

    ts_per_article[[
        't-15', 't-14', 't-13', 't-12', 't-11', 't-10', 't-9', 't-8', 't-7',
        't-6', 't-5', 't-4', 't-3', 't-2', 't-1', 't'
    ]] = ts_per_article[[
        't-15', 't-14', 't-13', 't-12', 't-11', 't-10', 't-9', 't-8', 't-7',
        't-6', 't-5', 't-4', 't-3', 't-2', 't-1', 't'
    ]].astype(int)
    ts_per_article.to_csv(wiki_file,
                          index=False,
                          header=True,
                          encoding='utf-8')
コード例 #18
0
def getSum(project,startdate,enddate,limit=1000, thumbsize=1000):

	#define stopwords
	stopwords = ['Progetto:','Pagina_principale','Wikipedia:','Aiuto:','Speciale:','Special:','File:','Categoria:','load.php']
	#add the custom ones
	#stopwords = stopwords + w_stopwords

	print stopwords
	#set up the maxvalue var

	maxvalue = 0

	data = dict()

	for date in daterange(startdate,enddate):
		print date.strftime("%Y-%m-%d")
		try:
			results = pageviewapi.top(project, date.year, date.strftime('%m'), date.strftime('%d'), access='all-access')
			#print json.dumps(results, indent=4, sort_keys=True)
			for item in results['items'][0]['articles']:
				if item['article'] in data:
					data[item['article']] += item['views']
				else:
					data[item['article']] = item['views']
		except:
			print('impossible to fetch ', date.strftime("%Y-%m-%d"))

	data = sorted(data.items(), key=operator.itemgetter(1), reverse=True)

	articles = []
	#create an object for each article
	rank = 1
	for elm in data:
		#chech stopwords
		stop = False
		for stopword in stopwords:
			if stopword in elm[0]:
				stop = True
				print 'stopped '+ elm[0]
				break
		if elm[0] in w_stopwords:
			stop = True
			found_stopwords.append(elm[0].replace('_',' '))
			print '\tfound custom sw: '+elm[0]
		if not stop:
			obj = {}
			obj['title'] = elm[0]
			obj['pageviews'] = elm[1]
			obj['rank'] = rank
			articles.append(obj)
			rank = rank + 1

	#add imgs and snippet
	for article in articles[:limit]:
		article['image'] = getImage('it.wikipedia', article['title'], thumbsize)
		article['snippet'] = getSnippet(project, article['title'])

	#add pageviews
	for article in articles[:limit]:
		print 'loading stats for', article['title'], ' from ', startdate.strftime('%Y%m%d'), ' to ', enddate.strftime('%Y%m%d')
		raw_stats = pageviewapi.per_article(project, urllib.quote(article['title'].encode('utf8')), startdate.strftime('%Y%m%d'), enddate.strftime('%Y%m%d'), access='all-access', agent='all-agents', granularity='daily')
		stats = []
		#parse raw stats
		#for now it is optimized for the vega code, quite messy.
		stats.append({})
		stats[0]['name'] = 'table'
		stats[0]['values'] = []
		#print json.dumps(raw_stats, indent=4, sort_keys=True) # check from here error of 6 output
		for item in raw_stats['items']:
			item_result = {}
			item_result['x'] = datetime.strptime(item['timestamp'],"%Y%m%d%M").strftime("%m/%d/%Y")
			item_result['y'] = item['views']
			if int(item['views']) > maxvalue:
				maxvalue = int(item['views'])

			stats[0]['values'].append(item_result)

		print json.dumps(stats, indent=4, sort_keys=True) # check from here error of 6 output
		article['stats'] = stats

	results = {}
	results['maxvalue'] = maxvalue
	results['project'] = project
	results['startdate'] = startdate.strftime("%Y-%m-%d")
	results['enddate'] = enddate.strftime("%Y-%m-%d")
	results['articles'] = articles[:limit]

	return results
コード例 #19
0
"""
import pageviewapi
tot=pageviewapi.per_article('sv.wikipedia', 'Lady Bird', '20180211', '20180212',
                        access='all-access', agent='all-agents', granularity='daily')


import pageviewapi.period
pageviewapi.period.sum_last('sv.wikipedia', 'Paris', last=30,
                            access='all-access', agent='all-agents')

tot=pageviewapi.period.avg_last('sv.wikipedia', 'Paris', last=30)
"""
import pageviewapi
tot=pageviewapi.per_article('sv.wikipedia', 'Baywatch', '20170602', '20170702',
                        access='all-access', agent='all-agents', granularity='daily')

print tot.items()[0][1][1]['views']

"""
AttrDict({u'items': [{u'access': u'all-access', u'views': 13, u'timestamp':
u'2017120600', u'agent': u'all-agents', u'project': u'sv.wikipedia',
u'granularity': u'daily', u'article': u'The_Ring_(film)'},
{u'access': u'all-access', u'views': 17, u'timestamp': u'2017120700',
u'agent': u'all-agents', u'project': u'sv.wikipedia', u'granularity': u'daily',
u'article': u'The_Ring_(film)'}, {u'access': u'all-access', u'views': 30,
u'timestamp': u'2017120800', u'agent': u'all-agents', u'project':
u'sv.wikipedia', u'granularity': u'daily', u'article': u'The_Ring_(film)'},
{u'access': u'all-access', u'views': 227, u'timestamp': u'2017120900',
u'agent': u'all-agents', u'project': u'sv.wikipedia', u'granularity':
u'daily', u'article': u'The_Ring_(film)'}, {u'access': u'all-access',
u'views': 46, u'timestamp': u'2017121000', u'agent': u'all-agents',
コード例 #20
0
import wikipedia
import pageviewapi
import pymongo
import os
from pymongo import MongoClient


#print (wikipedia.search("Barack"))
#ny = wikipedia.page("New York")
#print(ny.title)

#Datos
notice =  pageviewapi.per_article('en.wikipedia', 'Barack', '20151106', '20151120',
                        access='all-access', agent='all-agents', granularity='daily')
#print(notice)

#conexion a bd y creacion de la base de datos
myclient = MongoClient(host=os.environ['MONGO_HOST'], port=int(os.environ['MONGO_PORT']))
db = myclient.wiki

#coleccion
datos = db.datos

result = datos.insert_one(notice)

print ('Objeto instertado ' + str(result.inserted_id))
コード例 #21
0
def page_view(read_link, to_link):

    directory = "output"

    # Parent Directory path
    parent_dir = "./test/"

    path = os.path.join(parent_dir, directory)
    os.mkdir(path)

    result = pd.read_csv(read_link)
    result = result.drop(columns=['Unnamed: 0'])
    result['title'] = result['title'].str.replace("_", " ")
    result.columns = [
        'date', 'revert', 'edit', 'commentor', 'title', 'comment'
    ]
    page_view_df = result.groupby(['title']).agg({'date': [np.min]})
    page_view_df.columns = ['min_date']
    page_view_df['title'] = page_view_df.index
    page_view_df['title'] = page_view_df['title'].str.replace("_", " ")
    page_view_df = page_view_df.reset_index(drop=True)
    page_view_df['min_date'] = pd.to_datetime(page_view_df['min_date'])
    page_view_df['min_date'] = page_view_df.min_date.map(
        lambda x: x.strftime('%Y%m%d'))

    dataframe = pd.DataFrame()
    dictionary_other = {}
    for i in np.arange(page_view_df.shape[0]):
        title = page_view_df.iloc[i]['title']
        start_date = page_view_df.iloc[i]['min_date']
        try:
            page_v_dict = pageviewapi.per_article('en.wikipedia',
                                                  title,
                                                  start_date,
                                                  '20210101',
                                                  access='all-access',
                                                  agent='all-agents',
                                                  granularity='daily')
            new_dictionary = {}

            for key in page_v_dict:
                for i in page_v_dict[key]:
                    new_dictionary['title'] = i['article'].replace('_', ' ')
                    new_dictionary['timestamp'] = i['timestamp']
                    try:
                        new_dictionary['views'] += i['views']
                    except:
                        new_dictionary['views'] = 0

                new_dataf = pd.DataFrame(new_dictionary, index=[0])
                dataframe = pd.concat([new_dataf, dataframe])
        except:
            dictionary_other[title] = np.nan
            continue
    page_view = dataframe.drop_duplicates()
    page_view.reset_index(drop=True, inplace=True)

    non = pd.DataFrame.from_dict(dictionary_other, orient='index')
    non['title'] = non.index
    non = non.rename(columns={0: 'views'})
    non = non.reset_index(drop=True)
    non['timestamp'] = '2021010100'

    frames = [page_view, non]
    page_view = pd.concat(frames)

    page_view = page_view[['title', 'views']]
    last_dataf = result.merge(page_view, how='left', on='title')

    result = last_dataf.to_csv(to_link, index=False)
    return result