Ejemplo n.º 1
0
def shuffle_table(table=None, engine=None, shuffle=''):
    if table is None:
        table = config['TABLES']['QUEUE']
        
    print()
    if shuffle is True:
        x = 'y'
    elif shuffle is False:
        x = 'n'
    else:
        x = input(f'Shuffle {table}? Recomended to prevent IP being banned (y/n): ')
    print()
    
    if x == 'y':

        if engine == None:
            engine = mysql_engine()

        temp = f'{table}_backup_'+datetime.now().strftime('%d_%m')

        tprint(f'[·] Shuffling table {table} (can take up to 5 mins).')

        engine.execute(f'create table {temp} like {table}')
        engine.execute(f'insert into {temp} (original_link) select original_link from {table} order by rand()')
                
        engine.execute(f'drop table {table}')
        engine.execute(f'rename table {temp} to {table}')
        tprint('[+] Done shuffling.')
Ejemplo n.º 2
0
def get_full_df(n_pools=15,
                n=150,
                queue_table=None,
                processed_table=None,
                delete=False,
                engine=None,
                con=None,
                rand=False):

    if engine == None:
        engine = mysql_engine()
    if queue_table == None:
        queue_table = config['TABLES']['QUEUE']
    if processed_table == None:
        processed_table = config['TABLES']['PROCESSED']

    tprint('[·] Getting chunk...')
    chunk = get_chunk_from_db(n=n,
                              queue_table=queue_table,
                              processed_table=processed_table,
                              delete=delete,
                              engine=engine,
                              con=con,
                              rand=rand)

    tprint('[·] Populating chunk...')
    df = populate_df(chunk, n_pools=n_pools)

    return df
Ejemplo n.º 3
0
def populate_df(df, n_pools=15):
    pd.options.mode.chained_assignment = None

    row_list = list(df.T.to_dict().values())

    p = Pool(15, init_worker)

    try:
        news_dict = p.map_async(process_row, row_list, 15)
        news_dict.wait()

        out = pd.DataFrame(news_dict.get())

        p.close()
        p.join()

        # Set Dummy variables to 0 instead of None

        try:
            out.borrar[out.borrar.isnull()] = 0
        except:
            pass
        return out

    except KeyboardInterrupt:
        print()
        tprint('Interrupted')
        p.terminate()
        p.join()
        sys.exit()
Ejemplo n.º 4
0
def process_Biobiochile(page):
    d = process_inner(page)
    soup = bs(page.content, 'lxml')
    try:
        d['authors'] = soup.find('div', {'class': 'nota-autor'}).find('a').text
    except Exception as exc:
        tprint('[-] Error parsing authors (Biobiochile) - ',
               exc,
               important=False)
    try:
        d['section'] = ' '.join(
            soup.find('div', {
                'class': 'categoria-titulo-nota'
            }).text.split())
    except Exception as exc:
        tprint('[-] Error parsing section (Biobiochile) - ',
               exc,
               important=False)
    try:
        d['body'] = soup.find('div', {'class': 'nota-body'}).text
        d['body'] = d['body'].replace('Etiquetas de esta nota:', '')
    except Exception as exc:
        tprint('[-] Error parsing body (Biobiochile) - ', exc, important=False)
    try:
        d['description'] = None
    except Exception as exc:
        tprint('[-] Error parsing description (Biobiochile) - ',
               exc,
               important=False)

    return d
Ejemplo n.º 5
0
def reinsert_from_error_to_queue(engine=None, con=None, where=''):
    queue_table = config['TABLES']['QUEUE']
    error_table = config['TABLES']['ERROR']
    
    close = False
    close_ = False
    
    if engine == None:
        engine = mysql_engine()
        con = engine.connect()
        close = True

    if con == None:
        con = engine.connect()
        close_ = True
        
    # Where clause
    if where == '':
        where= input('Where clause for mysql query:\n\t- ')
        print()
        
    # Count and confirm
    tprint('[·] Counting links...')
    count = engine.execute(f'select count(*) from {error_table} where {where}').scalar()
    y = input(f'\nAre you sure you want to reinsert {count} links? (y/n): ')
    print()
    
    if y=='y':
    
        # Get links to be reinserted
        tprint('[·] Getting Links...')
        to_be_reinserted = mysql_query_as_set(f'select original_link from {error_table} where {where};', con=con)
        
        # Reinserting into queue
        tprint('[·] Reinserting into queue table...')
        insert_set(to_be_reinserted, queue_table, 'original_link', engine=engine, con=con)

        # Delete from error
        tprint('[·] Deleting from error table...')
        engine.execute(f'delete from {error_table} where {where}')
        
        count_error = engine.execute(f'select count(*) from {error_table}').scalar()
        tprint(f'[+] Done! {count_error} links left in {error_table} table')

    if close == True:
        con.close()
        engine.dispose()
    if close_ == True:
        con.close()
Ejemplo n.º 6
0
def update_db(df, backup=None, queue=None, engine=None, con=None, shuffle=False):
    if backup is None: backup = config['TABLES']['BACKUP']
    if queue is None: queue = config['TABLES']['QUEUE']
    if engine is None and con is None: engine = mysql_engine()
    if con is None: con = engine.connect()

    print()
    df.to_sql('erase', con=con, index=False, if_exists='append', chunksize=50000)

    engine.execute(f'insert ignore into {backup} (original_link) select original_link from erase')
    engine.execute(f'insert ignore into {queue} (original_link) select original_link from erase')

    engine.execute('drop table if exists erase')
    
    tprint('Successfully added urls to database.')
    
    shuffle_table(engine=engine, shuffle=shuffle)
Ejemplo n.º 7
0
def process_Emol(page):
    d = process_inner(page)
    soup = bs(page.content, 'lxml')
    try:
        d['section'] = d['link'].split('/')[4].capitalize()
    except Exception as exc:
        tprint('[-] Error parsing section (Emol) - ', exc, important=False)

    try:
        d['authors'] = soup.find('div',
                                 {'class', 'info-notaemol-porfecha'
                                  }).text.split('|')[-1].strip().replace(
                                      'Por ',
                                      '').replace('Redactado por ', '')
    except Exception as exc:
        tprint('[-] Error parsing section (Emol) - ', exc, important=False)

    return d
Ejemplo n.º 8
0
def get_chunk_from_db(n=150,
                      queue_table=None,
                      processed_table=None,
                      delete=False,
                      engine=mysql_engine(),
                      con=None,
                      rand=False):

    if queue_table == None:
        queue_table = config['TABLES']['QUEUE']
    if processed_table == None:
        processed_table = config['TABLES']['PROCESSED']

    if con == None:
        con = engine.connect()

    order = 'order by id'
    if rand == True:
        order = 'order by rand()'

    query = f'select original_link from {queue_table} {order} limit {str(n)}'

    try:
        # Reading rows
        df = pd.read_sql(query, con)

        # Backup and delete rows
        if delete == True:
            temp_table = temp_name(engine)
            df.to_sql(temp_table, con=con, if_exists='append', index=False)

            insert_query = f'INSERT  IGNORE INTO {processed_table} (original_link) SELECT original_link FROM {temp_table}'

            engine.execute(insert_query)
            engine.execute(f'delete from {queue_table} limit {str(n)}')
            engine.execute(f'drop table if exists {temp_table}')

    except Exception as exc:
        tprint('[-] Error en get_chunk_from_db()', exc)
        df = None

    return df
Ejemplo n.º 9
0
def process_Df(page):
    cookies = read_cookies()
    page = requests.get(page.url, cookies=cookies['df'])

    if '¡Página no encontrada!' in page.text:
        try:
            tprint('[·] df.cl page not found. Searching for title...',
                   important=False)

            title_ = page.url.split('/')[3].replace('-', '+')
            search_ = f'https://www.df.cl/cgi-bin/prontus_search.cgi?search_texto="{title_}"&search_prontus=noticias&search_tmp=search.html&search_idx=ALL&search_modo=and&search_form=yes'
            soup = bs(page.content, 'lxml')

            page = requests.get(search_)
            soup = bs(page.content, 'lxml')
            box = soup.find('div', {'id': 'wrap-noticias'})

            new_url = 'https://www.df.cl' + box.find('article').h2.a['href']
            tprint('[+] df.cl page found!', important=False)

            page = requests.get(new_url, cookies=cookies['df'])

        except Exception as exc:
            tprint('[-] df.cl page not found', important=False)

    d = process_inner(page)
    soup = bs(page.content, 'lxml')
    try:
        d['section'] = soup.find('meta',
                                 {'name': 'keywords'})['content'].strip()
    except Exception as exc:
        tprint('[-] Error parsing section (Df) - ', exc, important=False)
    try:
        d['body'] = '\n'.join([
            p for p in d['body'].split('\n')
            if len(p.split()) > 4 and p != d['description']
        ])
    except Exception as exc:
        tprint('[-] Error parsing body (Df) - ', exc, important=False)
    return d
Ejemplo n.º 10
0
def process_link(link):

    link = get_direct_link(link)

    if link != '':
        try:
            d = process_outer(link)

        except requests.exceptions.ConnectionError:
            error = '[-] Connection Error ' + link
            tprint(error, important=False)
            d = {'error': 1, 'info': 'ConnectionError'}

        except Exception as exc:
            error = '[-] Error General ' + link + ' :' + str(exc)
            error = error[:275]
            tprint(error, important=False)
            d = {'error': 1, 'info': error}

    else:
        # Mark for deletion if tweet does not contain any links.
        error = '[-] Link Vacío en process_link'
        tprint(error, important=False)
        d = {'error': 1, 'borrar': 1, 'info': error}

    return d
Ejemplo n.º 11
0
    def update(self):

        # Pendiente arreglar (borrar, info, columnas de menos)
            
        try:
            self.create()
            self.insert_table()

            try:
                self.error.to_sql(self.error_table, con = self.con, if_exists='append', index=False)

            except Exception as exc:
                tprint('[-] Error updating error TempTable - ', exc)   
                
        except DatabaseError as db_error:
            error_msg = db_error._message()
            
            if 'Incorrect string value' in error_msg:
                self.destroy()
                
                bad_row = int(error_msg.split()[-1]) - 1
                poison = error_msg.split("value: '")[1].split("...' for")[0]
                
                tprint(f'[-] Encoding Error on row {bad_row} ({poison}). Retrying...')
                i = self.press.reset_index()['index'][bad_row]
                self.df['error'][i] = 1
                self.df['info'][i] = 'Encoding Error'
                self.divide_df()
                
                self.update()
                
            else:
                raise Exception('Unknown DatabaseError')
                
        except Exception as exc:
            error = f'[-] Error updating {self.result_table} table TempTable - '+str(exc)
            error = error[:275]
            tprint(error)
            try:
                save = self.df
                save['info'] = save['info'].fillna(error[:255])
                save[['original_link', 'borrar', 'info']].to_sql(self.error_table, con = self.con, if_exists='append', index=False)
            except Exception as exc:
                tprint('[-] Error trying to save extracted rows TempTable - ', exc)

        self.destroy()
Ejemplo n.º 12
0
def process_Elmostrador(page):
    d = process_inner(page)
    soup = bs(page.content, 'lxml')
    d['description'] = None
    try:
        d['description'] = soup.find('figcaption').text
    except Exception as exc:
        tprint('[-] Error parsing description (Elmostrador) - ',
               exc,
               important=False)

    try:
        d['authors'] = soup.find('p', {
            'class': 'autor-y-fecha'
        }).find('a').text
    except Exception as exc:
        tprint('[-] Error parsing authors (Elmostrador) - ',
               exc,
               important=False)

    try:
        if 'www.elmostrador.cl' not in d['image'] and d['image']:
            d['image'] = 'https://www.elmostrador.cl' + d['image']
    except Exception as exc:
        tprint('[-] Error fixing image (Elmostrador) - ', exc, important=False)

    if not d['date']:
        try:
            date = [s for s in d['link'].split('/') if s.isdigit()][:3]
            d['date'] = datetime.datetime(*map(int, date))
        except Exception as exc:
            tprint('[-] Error parsing date (Elmostrador) - ',
                   exc,
                   important=False)

    try:
        d['section'] = ' '.join([
            x for x in soup.find_all('h2') if x.find('i') != None
        ][0].text.split())
    except Exception as exc:
        tprint('[-] Error parsing section (Elmostrador) - ',
               exc,
               important=False)

    try:
        d['body'] = d['body'].split('__________________')[0]
    except Exception as exc:
        tprint('[-] Error fixing body (Elmostrador) - ', exc, important=False)

    return d
Ejemplo n.º 13
0
def process_Cooperativa(page):
    d = process_inner(page)
    try:
        if 'al aire libre' in d['title'].lower():
            d = {'borrar': 1, info: 'Borrar, Al aire libre'}
    except:
        pass

    soup = bs(page.content, 'lxml')

    try:
        d['authors'] = soup.find('div', {
            'class': 'fecha-publicacion'
        }).find('span').text
    except Exception as exc:
        tprint('[-] Error parsing authors (Cooperativa) - ',
               exc,
               important=False)

    try:
        d['section'] = soup.find('a', {'id': 'linkactivo'}).text
    except Exception as exc:
        tprint('[-] Error parsing section (Cooperativa) - ',
               exc,
               important=False)

    try:
        d['tags'] = soup.find('meta', {'name': 'keywords'})['content'].strip()
    except Exception as exc:
        tprint('[-] Error parsing tags (Cooperativa) - ', exc, important=False)

    try:
        d['link'] = soup.find('meta', property='og:url')['content']
    except Exception as exc:
        tprint('[-] Error parsing link (Cooperativa) - ', exc, important=False)

    if not d['date']:
        try:
            date = [x for x in d['link'].split('/') if '-' in x][-1].split('-')
            d['date'] = datetime.datetime(*map(int, date))
        except Exception as exc:
            tprint('[-] Error parsing date (Cooperativa) - ',
                   exc,
                   important=False)

    try:
        if 'www.cooperativa.cl' not in d['image'] and d['image']:
            d['image'] = 'https://www.cooperativa.cl' + d['image']
    except Exception as exc:
        tprint('[-] Error fixing image (Cooperativa) - ', exc, important=False)

    return d
Ejemplo n.º 14
0
def process_Latercera(page):

    d = {}

    if 'Lo sentimos, estamos actualizando el sitio' not in page.text:
        d = process_inner(page)

    else:
        ### Buscar noticia en google, si es necesario.
        scraped_link = page.url.strip('/')
        tprint('[-] Link Latercera no encontrado', page.url, important=False)

        new_link = 'https://www.latercera.com/noticia/' + '-'.join([
            p for p in scraped_link.split('/')[-1].split('.')[0].split('-')
            if not p.isdigit()
        ])
        #print(new_link)
        page = requests.get(new_link)

        if 'Lo sentimos, estamos actualizando el sitio' not in page.text:
            d = process_inner(page)
            tprint('[+] Link Latercera encontrado (intento:1): ',
                   new_link,
                   important=False)

        else:
            try:
                tprint('[·] Google Searching...', important=False)
                buscar = ' '.join([
                    p for p in scraped_link.split('/')[-1].split('.')[0].split(
                        '-') if not p.isdigit()
                ]) + ' site:latercera.com'
                results = search(buscar, stop=5)
                rs = []
                for r in results:
                    rs.append(r)
                result = [r for r in rs if 'sitemap' not in r][0]

                if 'sitemap' not in result:
                    tprint('[+] Resultado en Google (intento:2):',
                           result,
                           important=False)
                    page = requests.get(result)
                    d = process_inner(page)
                else:

                    d['error'] = 1
                    d['info'] = 'Link Latercera no encontrado en google'

            except Exception as exc:
                tprint('[-] Link Latercera no encontrado', important=False)
                d['error'] = 1
                d['info'] = 'Link Latercera no encontrado en google'

    soup = bs(page.content, 'lxml')

    ### Recuperar Image.
    try:
        d['image'] = soup.find('figure').find('img')['src']
    except Exception as exc:
        tprint('[-] Error parsing image (Latercera) - ', exc, important=False)

    ### Recuperar Autor

    try:
        d['authors'] = [
            h.text for h in soup.find_all('h4') if 'Autor' in h.text
        ][0].replace('Autor: ', '')
    except Exception as exc:
        tprint('[-] Error parsing authors (Latercera) - ',
               exc,
               important=False)

    try:
        if d['description'] == None:
            d['description'] = soup.find('div', {'class': 'bajada-art'}).text
    except Exception as exc:
        tprint('[-] Error parsing description (Latercera) - ',
               exc,
               important=False)

    try:
        if d['date'] == None:
            date = ' '.join(
                soup.find('span', {
                    'class': 'time-ago'
                }).text.replace('|', '').split())
            d['date'] = datetime.datetime.strptime(date, '%d/%m/%Y %I:%M %p')
    except Exception as exc:
        tprint('[-] Error parsing date (Latercera) - ', exc, important=False)

    try:
        d['section'] = soup.find('meta', property='article:section')['content']
    except:
        try:
            d['section'] = [
                x.find('a').text for x in soup.find_all('h4')
                if x.find('a') != None and 'canal' in x.find('a')['href']
            ][0]
        except Exception as exc:
            tprint('[-] Error parsing section (Latercera) - ',
                   exc,
                   important=False)

    d['tags'] = ', '.join(
        [x['content'] for x in soup.find_all('meta', property='article:tag')])
    if not d['tags']:
        try:
            d['tags'] = ', '.join([
                x.text for x in soup.find('div', {
                    'class': 'tags-interior'
                }).find_all('a')
            ])
        except Exception as exc:
            tprint('[-] Error parsing tags (Latercera) - ',
                   exc,
                   important=False)

    return d
Ejemplo n.º 15
0
def delete_error_where(engine=None, con=None, where=''):
    
    processed_table = config['TABLES']['PROCESSED']
    error_table = config['TABLES']['ERROR']
    
    close = False
    close_ = False

    if engine == None:
        engine = mysql_engine()
        con = engine.connect()
        close = True

    if con == None:
        con = engine.connect()
        close_ = True

    # Where clause
    if where == '':
        where= input('Where clause for mysql query:\n\t- ')
        print()
        
    # Count and confirm
    tprint('[·] Counting links...')
    count = engine.execute(f'select count(*) from {error_table} where {where}').scalar()
    y = input(f'\nAre you sure you want to remove {count} links? (y/n): ')
    print()
    
    if y=='y':
    
        # Get links to be removed
        tprint('[·] Getting Links...')
        to_be_removed = mysql_query_as_set(f'select original_link from {error_table} where {where};', con=con)

        # Filtering Processed
        tprint('[·] Filtering processed table...')
        processed = mysql_query_as_set(f'select original_link from {processed_table};', con=con)
        processed = processed - to_be_removed

        # Reinserting into processed
        tprint('[·] Reinserting into processed table...')
        temp = f'{processed_table}_backup_'+datetime.now().strftime('%d_%m')
        engine.execute(f'create table {temp} like {processed_table}')
        insert_set(processed, temp, 'original_link', engine=engine, con=con)

        engine.execute(f'drop table {processed_table}')
        engine.execute(f'rename table {temp} to {processed_table}')
        
        # Delete from error
        tprint('[·] Deleting from processed and error table...')
        engine.execute(f'delete from {error_table} where {where}')
        
        # Done.
        count_error = engine.execute(f'select count(*) from {error_table}').scalar()
        tprint(f'[+] Done! {count_error} links left in {error_table} table')
        

    if close == True:
        con.close()
        engine.dispose()
    if close_ == True:
        con.close()
Ejemplo n.º 16
0
def work(result_table=None, 
         df=None,
         debug=True,
         n_pools=15, 
         n=150, 
         queue_table=None,
         processed_table=None,
         error_table=None,
         delete=False,
         engine=None, 
         con=None, 
         rand=False):
    
    if df is None:
        df = pd.DataFrame()
        
    if result_table == None:
        result_table = config['TABLES']['RESULT']
    if queue_table == None:
        queue_table = config['TABLES']['QUEUE']
    if processed_table == None:
        processed_table = config['TABLES']['PROCESSED']
    if error_table == None:
        error_table = config['TABLES']['ERROR']

    s = time.time()
    
    tprint('[·] Downloading and processing data from table...')
    
    tt = TempTable(result_table=result_table,
                   df=df,
                   debug=debug,
                   n_pools=n_pools,
                   n=n,
                   queue_table=queue_table,
                   processed_table=processed_table,
                   error_table=error_table,
                   delete=delete,
                   engine=engine,
                   con=con,
                   rand=rand)
               
    if not tt.df.empty:
        
        t1 = time.time()
        tprint(f'[+] Done ({round(t1-s,2)} seconds)')
        tprint('[·] Inserting into main table...')

        tt.update()

        f = time.time()
        
        tprint(f'[+] Done ({round(f-t1,2)} seconds)')
        tprint(f'[+] {len(tt.press)}/{len(tt.df)} news scraped in {round(f - s,2)} seconds. ({round((f - s)/n, 2)} s/article)')
        status = 'working'
               
    else:
        # Terminate job when there are no links left.

        tprint('[+] DONE, updated every article.')
        status = 'done'
        
    tt.close_mysql()
    
    return tt.df, status
Ejemplo n.º 17
0
def program(result_table=None, 
            df=None,
            debug=True, 
            n_pools=15, 
            n=150, 
            queue_table=None,
            processed_table=None,
            error_table=None,
            delete=False, 
            engine=None, 
            con=None, 
            rand=False):
    
    if df is None:
        df = pd.DataFrame
        
    if result_table == None:
        result_table = config['TABLES']['RESULT']
    if queue_table == None:
        queue_table = config['TABLES']['QUEUE']
    if processed_table == None:
        processed_table = config['TABLES']['PROCESSED']
    if error_table == None:
        error_table = config['TABLES']['ERROR']
    
    
    # Initialiazing...
    if engine == None:
        engine = mysql_engine()
        con = engine.connect()
    if con == None:
        con = engine.connect()

    print()
    tprint('[·] Initializing...')
    status = 'working'
    init_mysql_db(engine=engine)
    recover_discarded(con=con)
    queue = len_tables('queue')['queue']
    tprint('[+] Done.')
    print()
    
    con.close()
    engine.dispose()

    
    
    i = 1
    while queue != 0:
        
        engine = mysql_engine()
        con = engine.connect()
        
        try:
            result, status = work(result_table=result_table, 
                                  df=df,
                                  debug=debug,
                                  n_pools=n_pools, 
                                  n=n,
                                  queue_table=queue_table,
                                  processed_table=processed_table,
                                  error_table=error_table,
                                  delete=delete,
                                  engine=engine, 
                                  con=con, 
                                  rand=rand)
            
            if status == 'done':
                tprint('[+] DONE!')
                input('\n(ENTER)')
                sys.exit()

        except KeyboardInterrupt:
            sys.exit()
            
        except Exception as exc:
            tprint('[-] Error General - ', exc)
        print()
                
        con.close()
        engine.dispose()
        
        if i%100 == 0:
            
            queue = len_tables('queue')['queue']
            tprint('[+] {} left in queue.'.format(queue))
            print()
            
        i += 1
        
    tprint('[+] DONE!')
Ejemplo n.º 18
0
def scrape_tweets(user, days=0, months=0, years=0, monthly=False, 
                  yearly=False, since='', until=''):
    
    path = f"{os.environ['HOME']}/presscontrol/twitter_tempfiles"
    if not os.path.exists(path):
        os.makedirs(path)

    tweets = {}
    counter = 0
    
    if until == '':
        until = datetime.today()+timedelta(1)
    else:
        until = datetime.strptime(until, '%Y-%m-%d')
    
    
    if since == '':
        d = int(days + months*31 + years*365)
        since = until - timedelta(d)
    else:
        since = datetime.strptime(since, '%Y-%m-%d')

    since_ = since
    
    while since_ < until:
        
        
        if since_.year not in tweets.keys():
            tweets[since_.year] = {}

        until_ = next_date(since_) if next_date(since_) < until else until
        
        if since_.day == until_.day:
            pr = f'{calendar.month_name[since_.month]} {since_.year}'
        else:
            pr = f'{dt2str(since_)} to {dt2str(until_)}'
            
        filename = f"{path}/{user} {pr}.pkl"
        tprint(f'[·] Getting tweets from {pr}')
        
        if os.path.exists(filename):
            tprint('[+] Found in twitter tempfiles')
            tweets_ = pd.read_pickle(filename)
            
        else:
            try:
                tweetCriteria = got.manager.TweetCriteria().setUsername(user).setSince(dt2str(since_)).setUntil(dt2str(until_))
                tweets_ = got.manager.TweetManager.getTweets(tweetCriteria)
                
                if len(tweets_) > 0:
                    with open(f"{path}/{user} {pr}.pkl", 'wb') as f:
                        pickle.dump(tweets_, f)

            except Exception as exc:
                tweets_ = []
                print('\nError\n', exc)

        tprint(f'[+] Done ({len(tweets_)} tweets).')
        counter += len(tweets_)
        tweets[since_.year][since_.month] = tweets_
        since_ = until_
        
    print()
    tprint(f'[+] DONE ({counter} tweets)')
        
    return tweets