def getDataFromLastYears(symbol, historic=False):

    # get the year from which we have to start scraping.
    year = int(props.get("startYear"))

    # check for company's db or create it if not created already.
    isCreated = dbop.createTable(symbol, historic)

    # this loop code will form dates and scrape data from the startYear say 2000 till last year's December say 2017.
    while year < currentYear:
        startMonth = 1
        endMonth = startMonth + 1
        while endMonth < 13:
            if not historic:
                result = formDateAndCallScrapper(startMonth, endMonth, year)
            else:
                result = formDateAndCallScrapper(startMonth,
                                                 endMonth,
                                                 year,
                                                 historic=True)
            startMonth = endMonth + 1
            endMonth = startMonth + 1
        year += 1

    startDay = 1
    startMonth = 1
    endMonth = startMonth + 1
    limitMonth = int(datetime.datetime.now().strftime("%m"))  # Current month

    # now this loop is for the last slot of month/months which couldn't form 2 months pack.
    while endMonth < limitMonth:
        if not historic:
            result = formDateAndCallScrapper(startMonth, endMonth, year)
        else:
            result = formDateAndCallScrapper(startMonth,
                                             endMonth,
                                             year,
                                             historic=True)
        startMonth = endMonth + 1
        endMonth = startMonth + 1

    if limitMonth - startMonth == 0 or limitMonth - startMonth == 1:
        startDate = "0" + str(startDay) + "-0" + str(startMonth) + "-" + str(
            year)
        endDate = str(datetime.datetime.now().strftime("%d-%m-%Y"))
        print "start - ", startDate, " to end - ", endDate
        msg = "start - " + startDate + " to end - " + endDate
        Log(msg)
        if not historic:
            sc = Scrapper()
            result = sc.equityScrapper(symbol,
                                       startDate,
                                       endDate,
                                       selected=True,
                                       timeout=100)
        else:
            sc = Scrapper(historic=True)
            result = sc.historicScrapper(startDate, endDate)
Exemple #2
0
 def initialise_stats(self):
     if self.has_scrapper_links and self.has_valid_predictions:
         self.predictions = Prediction().initialise_prediction(
         ).get_all_prediction()
         self.driver_standings = Scrapper().initialise_links(
         ).scrape_driver()
         self.team_standings = Scrapper().initialise_links(
         ).scrape_constructor()
         return self
     else:
         print("Links and predictions not initialised properly")
         return self
def formDateAndCallScrapper(startMonth, endMonth, year, historic=False):
    dates = dt.dateCreator(startMonth, endMonth, year)
    print "start - ", dates[0], " to end - ", dates[1]
    msg = "start - " + dates[0] + " to end - " + dates[1]
    Log(msg)
    if not historic:
        sc = Scrapper()
        return sc.equityScrapper(symbol,
                                 dates[0],
                                 dates[1],
                                 selected=True,
                                 timeout=100)
    else:
        sc = Scrapper(historic=True)
        return sc.historicScrapper(dates[0], dates[1])
Exemple #4
0
def aule():
    aula = request.args.get('aula')
    settimanaDopo = request.args.get('settimanaDopo')

    # Conversione da string a boolean
    if settimanaDopo == 'True':
        settimanaDopo = True
    else:
        settimanaDopo = False
    scrapper = Scrapper()
    dati = scrapper.cerca_orario_aule(aula, settimanaDopo)
    if dati is None:
        return "SETTIMANA DI VACANZA"
    ris = "Aula " + aula + "<br>"
    for giorni in dati:
        for giorno in giorni.values():
            if isinstance(giorno, str):
                ris += giorno + " "
            else:
                for materie in giorno:
                    for materia in materie.values():
                        if isinstance(materia, str):
                            ris += materia + " "
                        else:
                            for classe in materia:
                                ris += classe + " "
                    ris += "<br>"
            ris += "<br>"
    return ris
Exemple #5
0
def create_app():
    app = Flask(__name__)
    CORS(app)

    from blueprints import npcs_blueprint
    from blueprints import gears_blueprint
    from blueprints import runes_blueprint
    from blueprints import biomes_blueprint
    from blueprints import bosses_blueprint
    from blueprints import outfits_blueprint
    from blueprints import pickups_blueprint
    from blueprints import enemies_blueprint
    from blueprints import mutations_blueprint
    from blueprints import achievements_blueprint

    app.register_blueprint(npcs_blueprint.bp)
    app.register_blueprint(gears_blueprint.bp)
    app.register_blueprint(runes_blueprint.bp)
    app.register_blueprint(biomes_blueprint.bp)
    app.register_blueprint(bosses_blueprint.bp)
    app.register_blueprint(outfits_blueprint.bp)
    app.register_blueprint(pickups_blueprint.bp)
    app.register_blueprint(enemies_blueprint.bp)
    app.register_blueprint(mutations_blueprint.bp)
    app.register_blueprint(achievements_blueprint.bp)

    app.scrapper_manager = Scrapper()

    @app.errorhandler(404)
    def route_not_found(error):
        app.logger.error(error)
        return 'Route not found.', 404

    return app
Exemple #6
0
def count(id):
    logger.info(f'Adding task for id: {id}')
    session = Session()
    task = session.query(Tasks).filter_by(id=id).first()
    res = Results(address=task.address, words_count=0, http_status_code=0)

    try:
        scrpr = Scrapper(task.address)
    except:
        scrpr = None

    if scrpr:
        err = scrpr.get_page()
        if not err:
            task.http_status_code, matches = scrpr.count_matches()
            task.task_status = 'FINISHED'
            res = Results(address=task.address,
                          words_count=matches,
                          http_status_code=task.http_status_code)
        else:
            print(err)

    session.add(res)
    session.commit()
    logger.info(task)
    logger.info(res)
def extract_acts():
    scrapper = Scrapper(constants.base_url)
    #when the url is requested without data, the search form is retrieved
    home_page = scrapper.request({})
    acts_scrapper = ActsParser(home_page)
    acts_scrapper.parse()
    scrapper.save_data(acts_scrapper.acts, "acts.json")
Exemple #8
0
def scrap_and_upload(vehicle_category):
    """
    """
    if vehicle_category is None:
        sys.exit("vehicle category cannot be null")
    vehicles = load_scrapping_links(vehicle_category)

    start_time = datetime.utcnow().strftime("%Y-%m-%d")
    create_directory(f"tmp")
    create_directory(f"tmp/{vehicle_category}")
    file_path = f"{DIR_NAME}/tmp/{vehicle_category}/{start_time}.csv"

    if os.path.exists(file_path):
        header = None
    else:
        header = ["Make", "Model", "Trim", "Year", "Mileage", "Price"]

    for make, model, urls in vehicles:
        for website_name, link in urls.items():
            if website_name == 'cg':
                urlsuffix = "#resultsPage="
            elif website_name == 'ed':
                urlsuffix = "?pagenumber="
            site_scrapper = Scrapper(website_name, link, urlsuffix, make,
                                     model, vehicle_category)
            site_scrapper.fetch_batch(NUM_OF_PAGES)
            if site_scrapper.listings:
                with open(file_path, "a") as csvfile:
                    write(csvfile, site_scrapper.listings, header)
                    header = None

    if os.path.exists(file_path):
        s3_client = boto3.client('s3')
        s3_client.upload_file(file_path, DESTINATION_BUCKET,
                              f"{vehicle_category}/{start_time}.csv")
Exemple #9
0
def check_prices():
    users = session.query(User).all()
    scrapper = Scrapper()
    items = session.query(Item).all()
    for item in items:
        scrapper.go_to(item.link)
        price = scrapper.get_price()
        title = scrapper.get_title()
        if not item.title:
            item.title = title
            session.commit()
        if item.price:
            change_percentage = (abs(price - item.price) / item.price) * 100.0
            if change_percentage >= 3:
                item.price = price
                session.commit()
                markup = InlineKeyboardMarkup(
                    [InlineKeyboardButton('Check', url=item.link)])
                for u in users:
                    try:
                        bot.send_message(
                            u.tg_id,
                            '<code>{}</code> price changed'.format(title),
                            parse_mode=ParseMode.HTML,
                            reply_markup=markup)
                    except Exception as e:
                        config.logger.error(
                            'Error sending a message: {}'.format(e))
        else:
            item.price = price
            session.commit()
Exemple #10
0
def getDataFromLast7Dayz(symbol):
    isCreated = dbop.createTable(symbol)
    print "getting data from last 7 days for ", symbol
    msg = "getting data from last 7 days for " + symbol
    Log(msg)
    sc = Scrapper()
    result = sc.equityScrapper(symbol, selected=False, timeout=100)
Exemple #11
0
def scrapeURL():
    data = request.json
    url = data['url']
    response = dict()
    scrapper = None

    if urlExists(url, timeout=20, check_is_image=False):
        if isInCustomSites(url):
            scrapper = CustomScrapper()
            response['custom'] = True
        else:
            scrapper = Scrapper()
            response['custom'] = False

        image_or_data_urls = scrapper.scrape(url)
        if len(image_or_data_urls) > 0:
            response['success'] = True
            response['output'] = image_or_data_urls
            response['stats'] = scrapper.stats
        else:
            response['success'] = False
            response['output'] = "NO_IMAGES_FOUND"
    else:
        response['success'] = False
        response['output'] = "INVALID_URL"

    return response
Exemple #12
0
def olx_bot():
    scrapper = Scrapper()

    if (Scrapper.isExecution):
        return render_template('running.html')
    else:
        scrapper.start()
        return render_template('sucess.html')
Exemple #13
0
def olx_bot():
    scrapper = Scrapper()

    if (Scrapper.isExecution):
        print('O Programa já está sendo executado')
    else:
        scrapper.start()
        print('O Programa está sedo iniciado')
    def run(self):

        scapper = Scrapper()
        linklist = scapper.loadLink(self.rooturl)

        dbr = DB()
        dbr.rawlinks_save(linklist)

        pass
 def run(self):
     scrapper = Scrapper()
     global folder_path
     global test_df
     test_df = generate_test_data(self.username, self.threshold)
     folder_path = scrapper.dowload_data(self.username, self.threshold)
     #user_account="skyemcalpine"
     folder_path = folder_path.replace("\\", "/")
     print(folder_path)
     self.signals.result.emit(True)
Exemple #16
0
    def check_and_scrap_reviews(self, hotel_name, platforms):
        for platform in platforms:
            if (platform == 'TA'):
                data = self.read_csv_to_list(
                    "C:/Users/acfelk/Documents/IIT_Files/final year/FYP/fyp_workfiles/final_project/backend/drops/"
                    + hotel_name + "-tripadvisor.csv")

                if data is None:
                    # NOW CALL THE SCRAPPER TO SCRAP REVIEWS TO drops
                    scrapper = Scrapper()
                    scrapper.scrap_reviews(hotel_name, platform)

            if (platform == 'BC'):
                data = self.read_csv_to_list(
                    "C:/Users/acfelk/Documents/IIT_Files/final year/FYP/fyp_workfiles/final_project/backend/drops/"
                    + hotel_name + "-bookingscom.csv")

                if data is None:
                    # NOW CALL THE SCRAPPER TO SCRAP REVIEWS TO drops
                    scrapper = Scrapper()
                    scrapper.scrap_reviews(hotel_name, platform)
Exemple #17
0
def main():
    """
    Instancie mes classes Requester et Scrapper, effectue une première requete puis transmet la réponse au scrapper
    """

    requester = Requester()
    scrapper = Scrapper(requester)

    requested_response = requester.html_requester(constants.URL)
    category_list = scrapper.get_category_list(requested_response)

    scrapper.scrap_books_in_category(category_list, scrapper)
Exemple #18
0
    def get_table_info(self):
        details_movie = None
        try:
            if self.download_url is not None:
                self.sc = Scrapper(self.download_url)
                details_movie = self.sc.get_movie_details()

        except Exception as e:
            print("Error initializing the Scrapper: " + e)

        if details_movie is not None:
            return details_movie
Exemple #19
0
def book_download(query, book_name):
    data = Scrapper(query).parse_data()
    # gets the book_name from the data and gets the direct download link for the book
    try:
        book = list(
            filter(lambda book: book['Book']['title'] == book_name, data))[0]
        direct_dl = DownloadFetcher(book).get_direct_download()
        return jsonify({'book': book, 'download': direct_dl}), 200

    except Exception as e:
        print(e)
        print(book_name)
        return f"Error specified book name not found for query = {query}", 404
Exemple #20
0
def create_recipe():
    json_data = request.get_json()
    url = json_data.get('url')
    type_recipe = json_data.get('typeRecipe')
    print(f'Creating entry \'{type_recipe}\' for url: \'{url}\'')

    if type_recipe is None:
        raise ValueError("typeRecipe is empty")
    if url is None:
        raise ValueError("URL is empty")

    recipe = mongo.add_recipe(
        Scrapper(url=url, type_recipe=type_recipe).scrap())
    return {'success': True, 'recipe': recipe}
Exemple #21
0
    def __init__(self):
        ap = argparse.ArgumentParser()
        ap.add_argument("-train",
                        "--train",
                        required=True,
                        help="whether to train a model or not")
        self.args = vars(ap.parse_args())

        self.scrapper = Scrapper()
        # self.dataManager = DataManager()
        self.filterImage = FilterImage()
        self.faceRecognition = FaceRecognition()
        self.emotionDetection = EmotionDetection()
        self.model = Model()
Exemple #22
0
async def main():
    with open('settings.yml') as file:
        settings = load(file)
    scrapper = Scrapper()
    war = scrapper.scape('https://en.wikipedia.org/wiki/War')
    charity = scrapper.scape(
        'https://en.wikipedia.org/wiki/Charity_(practice)')
    beer = scrapper.scape('https://en.wikipedia.org/wiki/Beer')
    death = scrapper.scape('https://en.wikipedia.org/wiki/Death')
    witai_settings = settings.get('witai', {})
    witai = Witai(witai_settings.get('entity'), witai_settings.get('token'))
    await witai.put_words((await war)[0], Value.NEGATIVE)
    await witai.put_words((await charity)[0], Value.POSITIVE)
    await witai.put_words((await beer)[0], Value.POSITIVE)
    await witai.put_words((await death)[0], Value.NEGATIVE)
 def test(self,username,threshold):
     scrapper=Scrapper()
     folder_path=scrapper.dowload_data(username,threshold)
     dataProcessor=DataProcessor(folder_path)
     data=dataProcessor.create_dataframe_input()
     #print(data)
     class_names=['food and drink', 'entertainment', 'business and industry', 'family and relationships', 'fitness and wellness', 'hobbies and activities', 'shopping and  fashion', 'sports and outdoors', 'technology']
     model_path="./last_cnn_model.h5"
     cnnModel=CnnModel(class_names,model_path,data)
     model=cnnModel.load_model()
     test_generator=cnnModel.create_generator()
     prediction=cnnModel.getPrediction(model,test_generator)
     result=np.sum(prediction,axis=0)
     result*=(1/len(prediction))
     return result
Exemple #24
0
def start_scraper():
    global SELENIUM
    global FILE_NAME
    global TEST
    kwargs = {
        'selenium': SELENIUM,
        'url': FILE_NAME,
        'test': TEST,
        'skip_after': 0,
        'skip_before': 0,
        'export': 'json'
    }
    if not TEST:
        print('test not enabled...')
        Scrapper(**kwargs).crawl()
    else:
        Scrapper.test()
Exemple #25
0
def main(args):
    username = args.username
    cid = os.environ['SPOTIPY_CLIENT_ID']
    secret = os.environ['SPOTIPY_CLIENT_SECRET']
    redirect_uri = os.environ['SPOTIPY_REDIRECT_URI']
    content = args.content

    spoti = TrackExtractor(username, cid, secret, redirect_uri)
    sc = Scrapper()
    if content == 'all':
        ret_tracks, _, _ = spoti.all_tracks()
    elif content == 'playlists':
        ret_tracks, _, _ = spoti.tracks_in_all_playlists()
    elif content == 'saved_tracks':
        ret_tracks, _, _ = spoti.saved_tracks()
    else:
        print(
            'Wrong set of filter! Please enter one of [\'all\', \'playlists\',\'saved_tracks\']'
        )
def main():
	scrapper = Scrapper()
	merger = Merger()
	parser = Parser()
	client = MongoClient('localhost', 27017)
	db = client['Data']
	collection_socialmedia = db['socialmedia']

	#Begin real time collecting
	while True: 
		scrapper.scrap()	
		merger.main()
		parser.main()	
		sleep(3600)
		
		#Storing to mangoDB
		f = open( '/home/sartharion/Bureau/stage/POO/data.json', 'r')  
		file_data = json.load(f)
		collection_socialmedia.delete_many({})
		collection_socialmedia.insert_many(file_data)		
	
	client.close()
Exemple #27
0
def run_bot():
    """
    Load config, connect to database, initialize and launch bot
    """
    loop = asyncio.get_event_loop()
    logger = logging.getLogger(__name__)

    with open('../config/config.yml', 'r') as config_file:
        config = yaml.load(config_file, Loader=yaml.BaseLoader)

    try:
        db = Db(config)
        loop.run_until_complete(db.connect_db())
    except asyncpg.PostgresError as err:
        logger.error('Cannot connect to database: %s', err)
        return

    bot = Bot(command_prefix=config['bot']['prefix'])
    bot.db = db
    bot.add_cog(Scrapper(bot))
    bot.add_cog(QuiADit(bot))
    bot.run(config['bot']['token'])
Exemple #28
0
def Scrap():
    product = request.args.get('product')
    maxpages = request.args.get('max')
    website = request.args.get('website')

    if not maxpages:
        maxpages = 2
    print(product, maxpages)

    scrap = Scrapper()
    scrapped_data, csvfile = scrap.start(product,
                                         max=maxpages,
                                         website=website)

    record = Record(product=product,
                    created=datetime.today().strftime('%d_%m_%Y'),
                    pages=maxpages,
                    data=csvfile.split('/')[-1],
                    user=session.get('user'))
    db.session.add(record)
    db.session.commit()

    return jsonify(scrapped_data)
from scrapper import Scrapper

q = Scrapper("https://www.cpttrevano.ti.ch/orario/invite?invite=true")
q.cercaOrarioAule("417 (A-413)")
Exemple #30
0
from scrapper import Scrapper
import sys
import time

try:
    scrapper_obj = Scrapper()
    print("Created OBJ")
    scrapper_obj.go_to_page(1)
    driver = scrapper_obj.get_driver()
except Exception as errmsg:
    print("Error: {}".format(errmsg))
    sys.exit(1)
company_data_list = []
count = 0
# =================================================
# Custom code to extract data
# =================================================
table_data = driver.find_elements_by_class_name(
    "zp_3UsOq")  # for the entire row data
for row_data in table_data:
    count += 1
    print("Count : {}".format(count))
    company_data_dict = {
        "Company_Name": "",
        "Employee_Headcount": "",
        "Industry_Sector": "",
        "Linkedin_URL": "",
        "FB": "",
        "Twitter": "",
        "Website": ""
    }