Beispiel #1
0
import datetime
from Database.dbMongo import Database
from Scraper.liputan_scraper import Scraper_Liputan

SL = Scraper_Liputan()
db = Database()
now = datetime.datetime.now().date()

# list_category_tempo = ['nasional', 'pemilu', 'pilpres', 'dunia', 'bisnis', 'bola', 'sport', 'seleb', 'tekno', 'otomotif']
# list_name_category_tempo = ['news', 'news', 'news', 'news', 'bisnis', 'sports', 'sports', 'entertainment', 'tekno', 'otomotif']

status = 'harian'
database = 'scraper'
collection = 'tekno'
source = 'liputan6.com'
category = 'tekno'
name_category = 'tekno'
day = 31
month = 6
year = 2018

for i in range(31):

    #delete data from mongoDB
    db.delete_by_request(database, collection, source, i+1, month, year)

    # Get Data
    data = SL.get_dataHarian(category, name_category, year, month, i+1)

    attr = []
    for d in data:
Beispiel #2
0
from pymongo.errors import ServerSelectionTimeoutError
from Scraper.liputanScraper import liputanScraper
from Database.dbMongo import Database
import datetime
import pymongo
import yaml
import os

## OPEN CONFIG FILE YAML
filename_config = os.path.abspath("Config/config.yml")
config = yaml.load(open(filename_config, "r"))

## memanggil class tempoScrapper dan class Database
scraperLiputan = liputanScraper()
DB = Database()

##set datetime
now = datetime.date.today()


## Class Liputan6 Daily
class Liputan():
    ## fungsi untuk menginisialisasi default dalam class Database
    def __init__(self):
        self.host = config['database']['mongo']['host']
        self.database = config['database']['mongo']['database']
        self.collection = config['database']['mongo']['collection']
        self.port = config['database']['mongo']['port']
        self.iSource = 'liputan6.com'
        self.config = config
        self.day = now.day
Beispiel #3
0
import datetime
from Database.dbMongo import Database
from Scraper.tempo_scraper import Scraper_Tempo
from Scraper.kompas_scraper import Scraper_Kompas

ST = Scraper_Tempo()
SK = Scraper_Kompas()
db = Database()
now = datetime.datetime.now().date()

list_category_tempo = ['otomotif']
list_name_category_tempo = ['otomotif']

#delete data from mongoDB
# db.delete_dataMonthly('scraper', 'test2', 'tempo.co', '10', '2018')

# Get Data
# for x,y in zip(list_category_tempo, list_name_category_tempo):
#     data = ST.get_dataBulanan(x, y, now.year, now.month)
#
#     attr = []
#     for i in range(len(data)):
#         attr.append(data[i])
#
#     db.insert_data('scraper', 'test2', attr)

# data = ST.get_ner('bulanan')
# attr = []
# for d in data:
#     attr.append(d)
# db.delete_dataMonthly('scraper', 'test2', 'tempo.co', '10', '2018')
Beispiel #4
0
import datetime
from Database.dbMongo import Database
from Scraper.kompas_scraper import Scraper_Kompas

SK = Scraper_Kompas()
db = Database()
now = datetime.datetime.now().date()

database = 'scraper'
collection = 'test'
source = 'kompas.com'

list_category_kompas = [
    'news', 'ekonomi', 'bola', 'entertainment', 'tekno', 'otomotif'
]
list_name_category_kompas = [
    'news', 'bisnis', 'sports', 'entertainment', 'tekno', 'otomotif'
]

#delete data from mongoDB
db.delete_dataDaily(database, collection, source)

#Get Data
for x, y in zip(list_category_kompas, list_name_category_kompas):
    data = SK.get_dataHarian(x, y, now.year, now.month, now.day)

    attr = []
    for i in range(len(data)):
        attr.append(data[i])

    db.insert_data(database, collection, attr)
Beispiel #5
0
import datetime
from Database.dbMongo import Database
from Scraper.liputan_scraper import Scraper_Liputan
from Scraper.tempo_scraper import Scraper_Tempo
from Scraper.kompas_scraper import Scraper_Kompas

SL = Scraper_Liputan()
ST = Scraper_Tempo()
SK = Scraper_Kompas()

db = Database()
now = datetime.datetime.now().date()

##### TEMPO #########
# #delete data from mongoDB
# db.delete_dataDaily('scraper', 'test', 'tempo.co')
#
# list_category_liputan = ['tekno']
# list_name_category_liputan = ['tekno']
#
# # Get Data
# for x,y in zip(list_category_liputan, list_name_category_liputan):
#     data = ST.get_dataHarian(x, y, now.year, now.month, now.day)
#
#     attr = []
#     for i in range(len(data)):
#         attr.append(data[i])
#
#     db.insert_data('scraper', 'test', attr)
#
# data = ST.get_ner()