Ejemplo n.º 1
0
def getDataFromLast7Dayz(symbol):
    isCreated = dbop.createTable(symbol)
    print "getting data from last 7 days for ", symbol
    msg = "getting data from last 7 days for " + symbol
    Log(msg)
    sc = Scrapper()
    result = sc.equityScrapper(symbol, selected=False, timeout=100)
Ejemplo n.º 2
0
def scrap_and_upload(vehicle_category):
    """
    """
    if vehicle_category is None:
        sys.exit("vehicle category cannot be null")
    vehicles = load_scrapping_links(vehicle_category)

    start_time = datetime.utcnow().strftime("%Y-%m-%d")
    create_directory(f"tmp")
    create_directory(f"tmp/{vehicle_category}")
    file_path = f"{DIR_NAME}/tmp/{vehicle_category}/{start_time}.csv"

    if os.path.exists(file_path):
        header = None
    else:
        header = ["Make", "Model", "Trim", "Year", "Mileage", "Price"]

    for make, model, urls in vehicles:
        for website_name, link in urls.items():
            if website_name == 'cg':
                urlsuffix = "#resultsPage="
            elif website_name == 'ed':
                urlsuffix = "?pagenumber="
            site_scrapper = Scrapper(website_name, link, urlsuffix, make,
                                     model, vehicle_category)
            site_scrapper.fetch_batch(NUM_OF_PAGES)
            if site_scrapper.listings:
                with open(file_path, "a") as csvfile:
                    write(csvfile, site_scrapper.listings, header)
                    header = None

    if os.path.exists(file_path):
        s3_client = boto3.client('s3')
        s3_client.upload_file(file_path, DESTINATION_BUCKET,
                              f"{vehicle_category}/{start_time}.csv")
Ejemplo n.º 3
0
def count(id):
    logger.info(f'Adding task for id: {id}')
    session = Session()
    task = session.query(Tasks).filter_by(id=id).first()
    res = Results(address=task.address, words_count=0, http_status_code=0)

    try:
        scrpr = Scrapper(task.address)
    except:
        scrpr = None

    if scrpr:
        err = scrpr.get_page()
        if not err:
            task.http_status_code, matches = scrpr.count_matches()
            task.task_status = 'FINISHED'
            res = Results(address=task.address,
                          words_count=matches,
                          http_status_code=task.http_status_code)
        else:
            print(err)

    session.add(res)
    session.commit()
    logger.info(task)
    logger.info(res)
Ejemplo n.º 4
0
def aule():
    aula = request.args.get('aula')
    settimanaDopo = request.args.get('settimanaDopo')

    # Conversione da string a boolean
    if settimanaDopo == 'True':
        settimanaDopo = True
    else:
        settimanaDopo = False
    scrapper = Scrapper()
    dati = scrapper.cerca_orario_aule(aula, settimanaDopo)
    if dati is None:
        return "SETTIMANA DI VACANZA"
    ris = "Aula " + aula + "<br>"
    for giorni in dati:
        for giorno in giorni.values():
            if isinstance(giorno, str):
                ris += giorno + " "
            else:
                for materie in giorno:
                    for materia in materie.values():
                        if isinstance(materia, str):
                            ris += materia + " "
                        else:
                            for classe in materia:
                                ris += classe + " "
                    ris += "<br>"
            ris += "<br>"
    return ris
Ejemplo n.º 5
0
class Main:
    def __init__(self):
        ap = argparse.ArgumentParser()
        ap.add_argument("-train",
                        "--train",
                        required=True,
                        help="whether to train a model or not")
        self.args = vars(ap.parse_args())

        self.scrapper = Scrapper()
        # self.dataManager = DataManager()
        self.filterImage = FilterImage()
        self.faceRecognition = FaceRecognition()
        self.emotionDetection = EmotionDetection()
        self.model = Model()

    def run(self):
        self.scrapper.scrape()
        # self.dataManager.manage()
        roi_face, face_image = self.faceRecognition.recognizeFace()
        self.filterImage.blurImage(roi_face)
        self.faceRecognition.checkFaceRatio()
        self.emotionDetection.detect()
        self.faceRecognition.recognizeMouth()

        if self.args['train'] == '1':
            save_weights_name = input("Enter the name for weights: ")
            self.model.train(save_weights_name, save_weights='TRUE')
        self.model.load_weights(config.WEIGHT_NAME)
        self.model.predict()
        pass
Ejemplo n.º 6
0
def extract_acts():
    scrapper = Scrapper(constants.base_url)
    #when the url is requested without data, the search form is retrieved
    home_page = scrapper.request({})
    acts_scrapper = ActsParser(home_page)
    acts_scrapper.parse()
    scrapper.save_data(acts_scrapper.acts, "acts.json")
Ejemplo n.º 7
0
def olx_bot():
    scrapper = Scrapper()

    if (Scrapper.isExecution):
        print('O Programa já está sendo executado')
    else:
        scrapper.start()
        print('O Programa está sedo iniciado')
Ejemplo n.º 8
0
def olx_bot():
    scrapper = Scrapper()

    if (Scrapper.isExecution):
        return render_template('running.html')
    else:
        scrapper.start()
        return render_template('sucess.html')
Ejemplo n.º 9
0
def fetch_data(url, callback):
    try:
        r = yield gen.Task(http_client.fetch, url)
        print "done"
        callback(Scrapper.Blog(url=url, content=r.body[:100]))
    except:
        print "Something went wrong"
        callback(Scrapper.Blog())
Ejemplo n.º 10
0
    def run(self):

        scapper = Scrapper()
        linklist = scapper.loadLink(self.rooturl)

        dbr = DB()
        dbr.rawlinks_save(linklist)

        pass
 def run(self):
     scrapper = Scrapper()
     global folder_path
     global test_df
     test_df = generate_test_data(self.username, self.threshold)
     folder_path = scrapper.dowload_data(self.username, self.threshold)
     #user_account="skyemcalpine"
     folder_path = folder_path.replace("\\", "/")
     print(folder_path)
     self.signals.result.emit(True)
Ejemplo n.º 12
0
 def initialise_stats(self):
     if self.has_scrapper_links and self.has_valid_predictions:
         self.predictions = Prediction().initialise_prediction(
         ).get_all_prediction()
         self.driver_standings = Scrapper().initialise_links(
         ).scrape_driver()
         self.team_standings = Scrapper().initialise_links(
         ).scrape_constructor()
         return self
     else:
         print("Links and predictions not initialised properly")
         return self
Ejemplo n.º 13
0
    def get_table_info(self):
        details_movie = None
        try:
            if self.download_url is not None:
                self.sc = Scrapper(self.download_url)
                details_movie = self.sc.get_movie_details()

        except Exception as e:
            print("Error initializing the Scrapper: " + e)

        if details_movie is not None:
            return details_movie
Ejemplo n.º 14
0
def main():
    """
    Instancie mes classes Requester et Scrapper, effectue une première requete puis transmet la réponse au scrapper
    """

    requester = Requester()
    scrapper = Scrapper(requester)

    requested_response = requester.html_requester(constants.URL)
    category_list = scrapper.get_category_list(requested_response)

    scrapper.scrap_books_in_category(category_list, scrapper)
Ejemplo n.º 15
0
    def __init__(self):
        ap = argparse.ArgumentParser()
        ap.add_argument("-train",
                        "--train",
                        required=True,
                        help="whether to train a model or not")
        self.args = vars(ap.parse_args())

        self.scrapper = Scrapper()
        # self.dataManager = DataManager()
        self.filterImage = FilterImage()
        self.faceRecognition = FaceRecognition()
        self.emotionDetection = EmotionDetection()
        self.model = Model()
 def test(self,username,threshold):
     scrapper=Scrapper()
     folder_path=scrapper.dowload_data(username,threshold)
     dataProcessor=DataProcessor(folder_path)
     data=dataProcessor.create_dataframe_input()
     #print(data)
     class_names=['food and drink', 'entertainment', 'business and industry', 'family and relationships', 'fitness and wellness', 'hobbies and activities', 'shopping and  fashion', 'sports and outdoors', 'technology']
     model_path="./last_cnn_model.h5"
     cnnModel=CnnModel(class_names,model_path,data)
     model=cnnModel.load_model()
     test_generator=cnnModel.create_generator()
     prediction=cnnModel.getPrediction(model,test_generator)
     result=np.sum(prediction,axis=0)
     result*=(1/len(prediction))
     return result
Ejemplo n.º 17
0
def create_app():
    app = Flask(__name__)
    CORS(app)

    from blueprints import npcs_blueprint
    from blueprints import gears_blueprint
    from blueprints import runes_blueprint
    from blueprints import biomes_blueprint
    from blueprints import bosses_blueprint
    from blueprints import outfits_blueprint
    from blueprints import pickups_blueprint
    from blueprints import enemies_blueprint
    from blueprints import mutations_blueprint
    from blueprints import achievements_blueprint

    app.register_blueprint(npcs_blueprint.bp)
    app.register_blueprint(gears_blueprint.bp)
    app.register_blueprint(runes_blueprint.bp)
    app.register_blueprint(biomes_blueprint.bp)
    app.register_blueprint(bosses_blueprint.bp)
    app.register_blueprint(outfits_blueprint.bp)
    app.register_blueprint(pickups_blueprint.bp)
    app.register_blueprint(enemies_blueprint.bp)
    app.register_blueprint(mutations_blueprint.bp)
    app.register_blueprint(achievements_blueprint.bp)

    app.scrapper_manager = Scrapper()

    @app.errorhandler(404)
    def route_not_found(error):
        app.logger.error(error)
        return 'Route not found.', 404

    return app
Ejemplo n.º 18
0
def scrapeURL():
    data = request.json
    url = data['url']
    response = dict()
    scrapper = None

    if urlExists(url, timeout=20, check_is_image=False):
        if isInCustomSites(url):
            scrapper = CustomScrapper()
            response['custom'] = True
        else:
            scrapper = Scrapper()
            response['custom'] = False

        image_or_data_urls = scrapper.scrape(url)
        if len(image_or_data_urls) > 0:
            response['success'] = True
            response['output'] = image_or_data_urls
            response['stats'] = scrapper.stats
        else:
            response['success'] = False
            response['output'] = "NO_IMAGES_FOUND"
    else:
        response['success'] = False
        response['output'] = "INVALID_URL"

    return response
Ejemplo n.º 19
0
def start_scraper():
    global SELENIUM
    global FILE_NAME
    global TEST
    kwargs = {
        'selenium': SELENIUM,
        'url': FILE_NAME,
        'test': TEST,
        'skip_after': 0,
        'skip_before': 0,
        'export': 'json'
    }
    if not TEST:
        print('test not enabled...')
        Scrapper(**kwargs).crawl()
    else:
        Scrapper.test()
Ejemplo n.º 20
0
def get():
    register_no = request.args.get('register_no')
    dob = request.args.get('dob')

    if register_no is None or dob is None:
        resp = make_response(json.dumps({'error': 'Request parameters are not in correct format.'}))
    else:
        if not check_regno(register_no) and not check_dob(dob):
            resp = make_response(json.dumps({'error': 'Invalid Register Number and Date of Birth.'}))
        elif not check_regno(register_no):
            resp = make_response(json.dumps({'error': 'Invalid Register Number.'}))
        elif not check_dob(dob):
            resp = make_response(json.dumps({'error': "Date of Birth is invalid."}))
        else:
            s = Scrapper(register_no, dob)
            json_data = s.get_json()
            resp = make_response(json_data)

    resp.mimetype = 'application/json'
    return resp
Ejemplo n.º 21
0
def check_prices():
    users = session.query(User).all()
    scrapper = Scrapper()
    items = session.query(Item).all()
    for item in items:
        scrapper.go_to(item.link)
        price = scrapper.get_price()
        title = scrapper.get_title()
        if not item.title:
            item.title = title
            session.commit()
        if item.price:
            change_percentage = (abs(price - item.price) / item.price) * 100.0
            if change_percentage >= 3:
                item.price = price
                session.commit()
                markup = InlineKeyboardMarkup(
                    [InlineKeyboardButton('Check', url=item.link)])
                for u in users:
                    try:
                        bot.send_message(
                            u.tg_id,
                            '<code>{}</code> price changed'.format(title),
                            parse_mode=ParseMode.HTML,
                            reply_markup=markup)
                    except Exception as e:
                        config.logger.error(
                            'Error sending a message: {}'.format(e))
        else:
            item.price = price
            session.commit()
Ejemplo n.º 22
0
def main():
	scrapper = Scrapper()
	merger = Merger()
	parser = Parser()
	client = MongoClient('localhost', 27017)
	db = client['Data']
	collection_socialmedia = db['socialmedia']

	#Begin real time collecting
	while True: 
		scrapper.scrap()	
		merger.main()
		parser.main()	
		sleep(3600)
		
		#Storing to mangoDB
		f = open( '/home/sartharion/Bureau/stage/POO/data.json', 'r')  
		file_data = json.load(f)
		collection_socialmedia.delete_many({})
		collection_socialmedia.insert_many(file_data)		
	
	client.close()
Ejemplo n.º 23
0
def communicate():
    transport = THTTPTornadoTransport()
    pfactory = TJSONProtocol.TJSONProtocolFactory()
    client = Scrapper.Client(transport, pfactory)

    futures = [client.scrape('http://google.com/') for i in xrange(100)]

    try:
        yield futures
    except Exception as e:
        print e

    io_loop.stop()
Ejemplo n.º 24
0
def book_download(query, book_name):
    data = Scrapper(query).parse_data()
    # gets the book_name from the data and gets the direct download link for the book
    try:
        book = list(
            filter(lambda book: book['Book']['title'] == book_name, data))[0]
        direct_dl = DownloadFetcher(book).get_direct_download()
        return jsonify({'book': book, 'download': direct_dl}), 200

    except Exception as e:
        print(e)
        print(book_name)
        return f"Error specified book name not found for query = {query}", 404
Ejemplo n.º 25
0
def getDataFromLastYears(symbol, historic=False):

    # get the year from which we have to start scraping.
    year = int(props.get("startYear"))

    # check for company's db or create it if not created already.
    isCreated = dbop.createTable(symbol, historic)

    # this loop code will form dates and scrape data from the startYear say 2000 till last year's December say 2017.
    while year < currentYear:
        startMonth = 1
        endMonth = startMonth + 1
        while endMonth < 13:
            if not historic:
                result = formDateAndCallScrapper(startMonth, endMonth, year)
            else:
                result = formDateAndCallScrapper(startMonth,
                                                 endMonth,
                                                 year,
                                                 historic=True)
            startMonth = endMonth + 1
            endMonth = startMonth + 1
        year += 1

    startDay = 1
    startMonth = 1
    endMonth = startMonth + 1
    limitMonth = int(datetime.datetime.now().strftime("%m"))  # Current month

    # now this loop is for the last slot of month/months which couldn't form 2 months pack.
    while endMonth < limitMonth:
        if not historic:
            result = formDateAndCallScrapper(startMonth, endMonth, year)
        else:
            result = formDateAndCallScrapper(startMonth,
                                             endMonth,
                                             year,
                                             historic=True)
        startMonth = endMonth + 1
        endMonth = startMonth + 1

    if limitMonth - startMonth == 0 or limitMonth - startMonth == 1:
        startDate = "0" + str(startDay) + "-0" + str(startMonth) + "-" + str(
            year)
        endDate = str(datetime.datetime.now().strftime("%d-%m-%Y"))
        print "start - ", startDate, " to end - ", endDate
        msg = "start - " + startDate + " to end - " + endDate
        Log(msg)
        if not historic:
            sc = Scrapper()
            result = sc.equityScrapper(symbol,
                                       startDate,
                                       endDate,
                                       selected=True,
                                       timeout=100)
        else:
            sc = Scrapper(historic=True)
            result = sc.historicScrapper(startDate, endDate)
Ejemplo n.º 26
0
def create_recipe():
    json_data = request.get_json()
    url = json_data.get('url')
    type_recipe = json_data.get('typeRecipe')
    print(f'Creating entry \'{type_recipe}\' for url: \'{url}\'')

    if type_recipe is None:
        raise ValueError("typeRecipe is empty")
    if url is None:
        raise ValueError("URL is empty")

    recipe = mongo.add_recipe(
        Scrapper(url=url, type_recipe=type_recipe).scrap())
    return {'success': True, 'recipe': recipe}
Ejemplo n.º 27
0
def Scrap():
    product = request.args.get('product')
    maxpages = request.args.get('max')
    website = request.args.get('website')

    if not maxpages:
        maxpages = 2
    print(product, maxpages)

    scrap = Scrapper()
    scrapped_data, csvfile = scrap.start(product,
                                         max=maxpages,
                                         website=website)

    record = Record(product=product,
                    created=datetime.today().strftime('%d_%m_%Y'),
                    pages=maxpages,
                    data=csvfile.split('/')[-1],
                    user=session.get('user'))
    db.session.add(record)
    db.session.commit()

    return jsonify(scrapped_data)
Ejemplo n.º 28
0
def communicate():
    transport = TAMQPTornadoTransport()
    pfactory = TJSONProtocol.TJSONProtocolFactory()
    client = Scrapper.Client(transport, pfactory)

    yield gen.Task(transport.open)

    futures = [client.scrape('http://google.com/') for i in xrange(100)]

    yield futures

    client._transport.close()

    io_loop.stop()
Ejemplo n.º 29
0
    def check_and_scrap_reviews(self, hotel_name, platforms):
        for platform in platforms:
            if (platform == 'TA'):
                data = self.read_csv_to_list(
                    "C:/Users/acfelk/Documents/IIT_Files/final year/FYP/fyp_workfiles/final_project/backend/drops/"
                    + hotel_name + "-tripadvisor.csv")

                if data is None:
                    # NOW CALL THE SCRAPPER TO SCRAP REVIEWS TO drops
                    scrapper = Scrapper()
                    scrapper.scrap_reviews(hotel_name, platform)

            if (platform == 'BC'):
                data = self.read_csv_to_list(
                    "C:/Users/acfelk/Documents/IIT_Files/final year/FYP/fyp_workfiles/final_project/backend/drops/"
                    + hotel_name + "-bookingscom.csv")

                if data is None:
                    # NOW CALL THE SCRAPPER TO SCRAP REVIEWS TO drops
                    scrapper = Scrapper()
                    scrapper.scrap_reviews(hotel_name, platform)
Ejemplo n.º 30
0
def main(args):
    username = args.username
    cid = os.environ['SPOTIPY_CLIENT_ID']
    secret = os.environ['SPOTIPY_CLIENT_SECRET']
    redirect_uri = os.environ['SPOTIPY_REDIRECT_URI']
    content = args.content

    spoti = TrackExtractor(username, cid, secret, redirect_uri)
    sc = Scrapper()
    if content == 'all':
        ret_tracks, _, _ = spoti.all_tracks()
    elif content == 'playlists':
        ret_tracks, _, _ = spoti.tracks_in_all_playlists()
    elif content == 'saved_tracks':
        ret_tracks, _, _ = spoti.saved_tracks()
    else:
        print(
            'Wrong set of filter! Please enter one of [\'all\', \'playlists\',\'saved_tracks\']'
        )
Ejemplo n.º 31
0
def formDateAndCallScrapper(startMonth, endMonth, year, historic=False):
    dates = dt.dateCreator(startMonth, endMonth, year)
    print "start - ", dates[0], " to end - ", dates[1]
    msg = "start - " + dates[0] + " to end - " + dates[1]
    Log(msg)
    if not historic:
        sc = Scrapper()
        return sc.equityScrapper(symbol,
                                 dates[0],
                                 dates[1],
                                 selected=True,
                                 timeout=100)
    else:
        sc = Scrapper(historic=True)
        return sc.historicScrapper(dates[0], dates[1])
Ejemplo n.º 32
0
bay=["33.52694833905606,44.61786288710962","33.52779437548921,44.6187406119569","33.52844095177134,44.61952376688691","33.52912847311098,44.6203916749758","33.52941621924846,44.62153677469976","33.52955130148957,44.62233144707166","33.5300525893401,44.6227438007253","33.53074849762842,44.62332258304048","33.53171789286472,44.62402774084964","33.5324194892887,44.62470767274025","33.53284911288601,44.62509717000224",	"33.53319327158253,44.62545051188663",	"33.53388568626423,44.6257351767263",	"33.53436842166227,44.62606452934324",	"33.53505441166816,44.62632008064779",	"33.5353801786637,44.62659249116985",	"33.53630376612985,44.62625791333705",	"33.53648828981084,44.62599170149596",	"33.53610642979442,44.62526522473693",	"33.5357881089882,44.6248585430718",	"33.53500799910345,44.62426344134919",	"33.53465157533417,44.62368184867986",	"33.53431369292202,44.62296420957523",	"33.53382395865862,44.62231786828408",	"33.53333558808044,44.6216463315828",	"33.53246351873157,44.62075912758933",	"33.53167225920291,44.61991668370528",	"33.53115295169879,44.61933457914446",	"33.53057732703983,44.61855221942759",	"33.53004247447537,44.61794741478788",	"33.52952835400872,44.61738465609927",	"33.52893986428256,44.61685023094336",	"33.52844685348632,44.61674350283417",	"33.52770715079468,44.61675678646416","33.52693507427308,44.61711744970111","33.52694833905606,44.61786288710962"]
Bay = []

for coord in bay :
	lon, lat = coord.split(",")
	Bay.append(Coordinates(float(lat), float(lon)))

Field = Area(Bay)
Field.__start_point__()

caters = ["MOLODIZGNIY", "ADMIRAL LAZAREV", "SATURN", "ADMIRAL ISTOMIN", "V ADMIRAL KLOKACHEV", "NORD"]
Caters = []
for c in caters:
	Caters.append(Ship(c))

Scrap = Scrapper()

for cater in Caters :
	res = Scrap.scrape_ship(cater.name)
	if res == None:
		print cater.name, "Not found"
	else:
		v, cors, point = res
		print cater.name, "\t---\t", Field.__das_is_in__(point)
		print point
		print
	#print
#for x in Field.points:
#	print x.latitude, x.longitude

Ejemplo n.º 33
0
    def OptimizeChromosome(self, chromosome):
        chromosomeQuality = dict()
        for gene in chromosome:
            chromosomeQuality[gene] = self.CalculateCachedLinkQuality(gene)
        return sorted(chromosomeQuality, key=chromosomeQuality.get, reverse = True)

    def Plot(self):
        raise NotImplemented

    def Display(self):
        raise NotImplemented

gsa = GSA()

sc = Scrapper(str(input("Enter search query: ")), 20)
urls = set(sc.getLinks())
urlDict = dict()
for index, url in enumerate(urls):
    urlDict[index] = url
gsa.genes = urlDict.keys()


for key, value in urlDict.iteritems():
    features = sc.getFeatures(value)
    gsa.bounceRate[key] = features[0]
    gsa.pageView[key] = features[1]
    gsa.time[key] = features[2]
    gsa.searchVisit[key] = features[3]
    gsa.linkIn[key] = features[4]
Ejemplo n.º 34
0
 def result(self):
     self.lblResult.setText("")
     data = Scrapper(self.date.year(), self.date.month(), self.date.day())
     self.lblResult.setText(str(data.getPrice()))
Ejemplo n.º 35
0
 def update_from_ais(self):
     scrapper = Scrapper()
     data = scrapper.scrape_ship(self.name)
     return self.update(data)
Ejemplo n.º 36
0
import sys


def retry(func):
    @wraps(func)
    def decorated(*args):
        result = func(*args)
        while not result:
            print("Retrying..")
            result = func(*args, retry=True)
        return result
    return decorated


if __name__ == "__main__":
    scrapper = Scrapper()
    nodes = set()
    nodes_sets = [pickle.load(open(filename)) for filename in sys.argv[1:]]
    nodes_info_filename = "all_nodes_info.pickle"

    try:
        nodes_info = pickle.load(open(nodes_info_filename))
        processed = set(nodes_info.keys())
    except IOError:
        nodes_info = {}
        processed = set()

    for nodes_set in nodes_sets:
        for number, node in enumerate(nodes_set - processed):
            nodes_info[node] = scrapper.get_node_info(node)
            processed.add(node)