Example #1
0
def main():

    print("Downloading all blogs without comments...")
    start = time.time()

    results = []

    for i in range(0, 100):
        results.append(Result())
    j = 0
    for i in range(1, 1300, 13):
        process.crawl(Parser.Salon24Spider(),
                      input=1 + (i),
                      amount=13,
                      result=results[j])  #edited
        j += 1
    process.start()

    print("Took: ", time.time() - start, "sec")

    print("Downloading all comments...")
    start = time.time()

    data = [[], [], [], [], []]
    for i in range(0, 100, 5):
        data[0].extend(results[i].data)
        data[1].extend(results[i + 1].data)
        data[2].extend(results[i + 2].data)
        data[3].extend(results[i + 3].data)
        data[4].extend(results[i + 4].data)

    threads = []
    for i in range(0, 5):
        t = threading.Thread(target=DM.parseComments, args=(data[i], ))
        threads.append(t)
        t.start()

    for t in threads:
        t.join()

    print("Took: ", time.time() - start, "sec")

    print("Inserting to Database...")
    start = time.time()

    print("Connecting to Database...")

    client = MongoClient('localhost:27017')
    db = client.Salon24
    dbManager = DbManager(db)

    print("Connected successfully.")

    for result in data:
        for blog in result:
            dbManager.insert_entry(blog)

    print("Took: ", time.time() - start, "sec")