Exemple #1
0
    def post(self):
        data = request.json
        logger.info(f'Register Post data {data} [{type(data)}]')
        if data is None:
            return {'status': 'fail', 'message': 'No data passed'}, 400

        try:
            user = User.query.filter(
                or_(User.email == data.get('email'),
                    User.username == data.get('username'))).first()
            if user:
                logger.info(f"Resister found pre-existing User: {user}")
                return {
                    'status': 'fail',
                    'message': 'Username/Email Already exist!'
                }, 401
            user = User(
                email=data.get('email'),
                username=data.get('username'),
                password=data.get('password'),
            )
            db.session.add(user)
            db.session.commit()
            auth_token = user.encode_auth_token(user.id)
            return {
                'status': 'success',
                'message': 'Successfully registered',
                'auth_token': auth_token.decode()
            }, 201
        except Exception as e:
            logger.error(e)
            return {
                'status': 'fail',
                'message': 'An error has occurred',
            }, 401
Exemple #2
0
 def parse(self, response):
     logger.info(f'parsed url {response.url}')
     # SELENIUM DRIVER
     # self.driver.get(response.url)
     # self.driver.implicitly_wait(3)
     #Append data
     self.store = self.store.append({'c': 0, 'd': 2}, ignore_index=True)
Exemple #3
0
def init_db(conn: Connection) -> None:
    """
    Configures the target schema in which the tweets data will be stored,
    creates the schema and the table if not existing yet

    :param conn: SQLAlchemy connection object
    """

    logger.info(f"{Fore.YELLOW}Initializing database ...{Style.RESET_ALL}")

    # Create specified schema if not exists
    if not conn.dialect.has_schema(conn, schema_name):
        logger.info(
            f"{Fore.YELLOW}Schema {schema_name} does not exist, creating it ...{Style.RESET_ALL}"
        )
        conn.execute(schema.CreateSchema(schema_name))
        logger.info(
            f"{Fore.GREEN}Schema {schema_name} successfully created !{Style.RESET_ALL}"
        )
    else:
        logger.info(
            f"{Fore.GREEN}Schema {schema_name} was found, continuing database initialization "
            f"...{Style.RESET_ALL}")

    # Create tables
    Base.metadata.create_all(bind=conn)

    logger.info(
        f"{Fore.GREEN}Schema {schema_name} successfully configured !{Style.RESET_ALL}"
    )
    def __init__(self, USER, HOST, DATABASE, PASSWORD):

        self.USER = USER
        self.HOST = HOST
        self.DATABASE = DATABASE
        self.PASSWORD = PASSWORD

        try:
            conn = mysql.connector.connect(
                user=USER,
                host=HOST,
                database=DATABASE,
                password=PASSWORD,
            )
            self.conn = conn
            logger.info('connection database success')
            print('Connection success')

        except mysql.connector.Error as err:

            if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
                logger.error('umm something wrong with user name or password')
                print('umm something wrong with user name or password')

            elif err.errno == errorcode.ER_BAD_DB_ERROR:
                logger.error('database does not exist')
                print('database does not exist')

            else:
                logger.error(err)
                print(err)
def csv_load_to_db(filename, destination_folder, connection):
    """
    parse csv file and execute query to load into database.
    
    Arg:
        1. filename = name of csv file
        2. destination_folder= downloaded files directory
        3. connection = connection(HOST,HOST,DATABASE,PASSWORD)
    
    """
    csv_file = open(destination_folder + filename)

    count_header, count_row = 0, 0

    for row in islice(csv_file, 50001):  #    for row in islice(fl,1000):
        if count_header < 1:
            columns = row.rstrip().split(',')
            count_header += 1

            print(columns)
        else:

            val = row.rstrip().split(',')

            dt1 = datetime.strptime(val[5], '%m/%d/%Y').date()
            dt2 = datetime.strptime(val[7], '%m/%d/%Y').date()

            val[5] = dt1
            val[7] = dt2

            count_row += 1

            # print(val)
            ### this the part where we use all the parsed csv and insert it to target database
            ### we are not going to make new connection code to execute query, but we call methods from module connection to do the job
            ### since we'll call this class in main.py no need to import module connection here
            ###

            params = val

            insert_sql = '''INSERT INTO sales ({},{}, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`,`{}`, `{}`, `{}`, `{}`, `{}`, `{}`)
                VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''.format(
                *columns)
            # Iterable Unpacking
            #* to unpacked list,tuple,** unpacked dict

            os.system('clear')
            print('inserting data to database')

            connection.execute_query(insert_sql, params)

            connection.commit()

    logger.info("{}'s rows loaded into database".format(count_row))
Exemple #6
0
 def __init__(self, **kwargs):
     # instantiating chrome selenium driver, start urls feature an 'infinite scroll'
     # behavior that needs to be triggered to view all listed shoes
     self.store = kwargs.get('store', pd.DataFrame())
     self.file_path = kwargs.get(
         'file_path',
         path.join(
             project_path,
             f"results/{self.__class__.__name__}-{datetime.now().strftime('%y-%m-%d-%H-%M')}"
         ))
     if kwargs.get('use_selenium'):
         self.driver = get_driver()
     logger.info(
         f'{TemplateSelSpider.name} init, store: {self.store}, file_path: {self.file_path}'
     )
     super().__init__()
Exemple #7
0
 def post(self):
     data = request.json
     logger.info(f"Like post data[{type(data)}]: {data}")
     like = dbLike.query.filter(
         dbLike.user_id == data['user_id'],
         dbLike.post_id == data['post_id'],
     ).first()
     if not like:
         like = dbLike(user_id=data['user_id'],
                       post_id=data['post_id'],
                       value=data['value'])
     else:
         like.value = data['value']
     db.session.add(like)
     db.session.commit()
     return {
         'status': 'success',
         'message': 'post retrieved',
     }, 200
Exemple #8
0
	def get(self):
		data = request.args
		logger.info(f"Post get data[{type(data)}]: {data}")
		try:
			if 'id' in data:
				logger.info('REQUEST BY ID')
				posts = [dbPost.query.get(data['id']),]
			elif 'feed' in data:
				feed_query = json.loads(data['feed'])
				posts = [post for _, post in dbUserFollow.query.filter(
					dbUserFollow.follower_id == feed_query['userId']
				).join(
					dbPost, dbUserFollow.followed_id == dbPost.author_id
				).add_entity(
					dbPost
				).all()]
			else:
				posts = dbPost.query.join(
					dbUser, dbUser.id == dbPost.author_id
				).filter_by(
					# **{'id': data.get('id')}
				).add_entity(
					dbUser
				)
				if data:
					posts = posts.filter_by(
						**{'id': data.get('author_id')}
					)
				posts = [post for post, user in posts.all()]
			posts = [post.resp_dict() for post in posts]
			return {
				'status': 'success',
				'message': 'post retrieved',
				'posts': posts
			}, 200
		except Exception as e:
			logger.error(e)
			return {
					'status': 'fail',
					'message': 'An error has occurred',
				}, 401
Exemple #9
0
	def post(self):
		data = request.json
		logger.info(f'request data [{type(data)}]: {data}')
		email_body = f"FREELANCE SITE INQUERY:\n" \
		             f"name: {data['name']}\n" \
		             f"email: {data['email']}\n" + \
		             f"_"*12 + \
		             f"\n{data['message']}"
		msg = Message(
			subject="Freelance Site Inquery",
		    body=email_body,
		    sender=data['email'],
		    recipients=["*****@*****.**"]
		)
		mail.send(msg)
		response = {
			'status': 200,
			'body': 'Success',
		}
		logger.info(f'response: {response}')
		return response
Exemple #10
0
def train(experiment, parameters, method, joint, run_on_test=False):
    experiment_summary = get_experiment_summary(parameters.embedding_size,
                                                parameters.neg_ratio,
                                                parameters.batch_size,
                                                parameters.learning_rate,
                                                parameters.loss_ratio,
                                                parameters.type_ratios)

    module = create_module(experiment, method, joint, parameters)

    logger.info(" \n------------------ \n")
    logger.info("Running model with:\n")
    logger.info(experiment_summary)

    trainer = Trainer(module)
    valid_result = trainer.fit(experiment, parameters)

    print(experiment_summary)
    print_results('validation set', valid_result)

    if run_on_test:
        test_results = trainer.test(experiment, parameters.checkpoint_file)
        print_results('test set', test_results)
    else:
        test_results = None

    return valid_result, test_results
Exemple #11
0
def check_dataset(conn):
    '''
    if dataset is not exist or need updating, it will execute extraction function and return ``str`` 'ready' if not, return ``str`` 'up to date'
    '''
    client = bq.Client()
    rows = client.query('select count(*) from project_four.sales')
    conn = conn
    mysql_total_rows = check_database(conn)
    try:
        if mysql_total_rows is None:
            print('something wrong with database')
            pass
        else:
            for item in rows:
                dataset_total_rows = item[0]
                if dataset_total_rows == mysql_total_rows:
                    print('bigquery_is_up_to_date')
                    logger.info(
                        'bigquery is up to date, extraction process is skipped'
                    )
                    return 'up to date'
                elif dataset_total_rows < mysql_total_rows:
                    print('bq dataset is not up to date to mysql')
                    logger.info(
                        'bq dataset is not up to date to mysql, begin extraction'
                    )
                    mysql_to_pq(conn)
                    return 'ready'
    except GoogleAPIError:
        rows.error_result['reason'] == 'notFound'
        print(rows.error_result)
        logger.info('{}, new dataset created and begin extraction'.format(
            rows.error_result))
        mysql_to_pq(conn)
        return 'ready'
Exemple #12
0
 def post(self):
     auth_header = request.headers.get('Authorization')
     if auth_header:
         auth_token = auth_header.split(" ")[1]
         resp = User.decode_auth_token(auth_token)
         if isinstance(resp, str):
             return {'status': 'fail', 'message': resp}, 401
         else:
             data = request.json
             logger.info(f"Comment post data[{type(data)}]: {data}")
             if data is None:
                 return {'status': 'fail', 'message': 'No data passed'}, 400
             else:
                 try:
                     post_id = data['postId']
                     author_id = data['authorId']
                     comment = data['comment']
                     post = dbPost.query.filter(
                         dbPost.id == post_id).first()
                     new_comment = dbComment(post_id=post_id,
                                             author_id=author_id,
                                             body=comment)
                     post.comments.append(new_comment)
                     db.session.add(new_comment)
                     db.session.add(post)
                     db.session.commit()
                     return {
                         'status': 'success',
                         'message': 'comment submitted',
                     }, 200
                 except Exception as e:
                     logger.error(e)
                     return {
                         'status': 'fail',
                         'message': 'An error has occurred',
                     }, 401
     else:
         return {'status': 'fail', 'message': 'Invalid auth provided'}, 401
Exemple #13
0
	def post(self):
		data = request.form
		logger.info(f"/user post data[{type(data)}]: {data}")
		user = dbUser.query.filter(
			dbUser.id == data['userId']
		).first()
		if 'aviFile' in request.files.keys():
			avi_file = request.files['aviFile']
			avi_s3_name = f"user-{data['userId']}-avi"
			s3.upload_file(
				avi_file, s3.bucket_name,
				object_name=avi_s3_name
			)
			user.avi_s3_name = avi_s3_name
		if 'bio' in data.keys():
			user.bio = data["bio"]
		db.session.add(user)
		db.session.commit()
		return {
			'status': 'success',
			'message': 'profile updated',
			'user': user.resp_dict()
		}, 200
Exemple #14
0
	def post(self):
		data = request.form
		logger.info(f"Post post data[{type(data)}]: {data}")
		user_id = data['user_id']
		s3_name = f"{user_id}-{data['title'].replace(' ', '-')}-{str(uuid.uuid4().hex)}.png"
		image_file = request.files['image']
		# TODO: remove and change image upload to frontend?
		#  Will have to deal with cloudfront at some point anyways...
		s3.upload_file(
			image_file, s3.bucket_name,
			object_name=s3_name
		)
		# s3_url = f"https://{s3.bucket_name}.s3.amazonaws.com/{s3_name}"
		post = dbPost(
			author_id=user_id,
			title=data['title'],
			desc=data.get('description'),
			s3_name=s3_name,
			# s3_url=s3_url
		)
		db.session.add(post)
		tags = request.form['tags'].split(',')
		for tag in tags:
			db_tag = dbTag.query.filter(
				dbTag.name == tag
			).first()
			if not db_tag:
				db_tag = dbTag(name=tag)
			post.tags.append(db_tag)
			db.session.add(db_tag)
		db.session.commit()
		return {
			'status': 'success',
			'message': 'post uploaded',
			# TODO: implement to json function in post database class
			'post': post.resp_dict()
		}, 200
Exemple #15
0
    def post(self):
        data = request.json
        logger.info(f'Login Post data {data} [{type(data)}]')
        if data is None:
            return {'status': 'fail', 'message': 'No data passed'}, 400

        try:
            # user = User.query.filter_by(
            # 	email=data.get('email'),
            # 	username=data.get('username')
            # ).first()
            user = User.query.filter(
                (User.username == data.get('username'))
                | (User.email == data.get('email'))).first()
            logger.info(f'Login Post user query result: {user}')
            if user and bcrypt.check_password_hash(user.password,
                                                   data.get('password')):
                auth_token = user.encode_auth_token(user.id)
                if auth_token:
                    return {
                        'status': 'success',
                        'message': 'Success',
                        'user': user.resp_dict(include_private=True),
                        # 'user': user.resp_dict(),
                        'auth_token': auth_token.decode()
                    }, 200
            else:
                return {
                    'status': 'fail',
                    'message': 'Username or Password are invalid!'
                }, 401
        except Exception as e:
            logger.error(e)
            return {
                'status': 'fail',
                'message': 'An error has occurred',
            }, 401
Exemple #16
0
	def get(self):
		authenticated = check_auth(request)
		try:
			data = dict(request.args)
			logger.info(f"User get data[{type(data)}]: {data}")
			user = dbUser.query.filter_by(**data).first()
			logger.warning(f'authenticated: {authenticated}')
			if isinstance(authenticated, int):
				same_user = authenticated == user.id
				return {
					'status': 'success',
					'user': user.resp_dict(include_private=same_user),
				}, 200
			else:
				return {
					'status': 'success',
					'user': user.resp_dict()
				}, 200
		except Exception as e:
			logger.error(e)
			return {
				'status': 'fail',
				'message': 'An error has occurred',
			}, 401
Exemple #17
0
def main():
	'''
	Starts harvest script
	'''
	logger.info('launching main')
	# tz = pytz.timezone('America/Los_Angeles')
	tz = pytz.timezone('EST')
	start = datetime.datetime.now(tz=tz)

	if config['use_proxy']:
		update_proxies()

	# launching crawlers
	store = Store()
	process = CrawlerProcess(custom_settings)
	for spider in spiders:
		logger.info(f'starting {spider.name}')
		process.crawl(spider, store=store)
	process.start()
	process.join()

	end = datetime.datetime.now(tz=tz)
	logger.info(f"runtime: {end - start}")
Exemple #18
0
        return 'ready'


if __name__ == '__main__':

    with open('config.json', 'r') as json_file:
        config = json.load(json_file)

    conn = mysql.connector.connect(
        user=config['mysql']['USER'],  # establish target database
        host=config['mysql']['HOST'],
        database=config['mysql']['DATABASE'],
        password=config['mysql']['PASSWORD'])
    job = check_dataset(conn)
    if job is 'ready':
        load.load_blob(bucket_name=func_param['bucket_name'],
                       destination_blob=func_param['dest_blob_transform'],
                       source_file_path=func_param['source_transform'])
        logger.info('parquet file succesfully load into cloudstorage')

        load.pq_gcs_to_bigquery(uri='gs://' + func_param['bucket_name'] + '/' +
                                func_param['dest_blob_transform'],
                                dataset='project_four',
                                table_id='sales',
                                write_disposition='WRITE_APPEND')
        logger.info('dataset updated')
    else:
        logger.info('dataset is up to date')

        pass
Exemple #19
0
def mysql_to_pq(conn,
                source_transform=func_param['source_transform'],
                name_of_dataset='project_four',
                by_row_batch=5):
    '''
     
    extract mysql database and save into local pq ``tmp/sales-date.pq``. this function take the last rows of bq dataset and compared againts current
    mysql database to avoid duplication, only extract load new data from mysql to bq. if dataset not exist it will create dataset using name given
    
    Args:
        1. source_transform = 'path/local/file.pq'

        2. by_row_batch = number of row you want to extract ``int``

    return: 
        ``str`` of local pq file path
    '''
    client = bq.Client()
    row_id = client.query(
        'select id from project_four.sales order by id desc limit 1')
    try:
        for i in row_id:
            last_row_id = i[0]
            print('last row in dataset is, ' + i[0])
    except GoogleAPIError:
        row_id.error_result['reason'] == 'notFound'
        last_row_id = 0
        print('no dataset.table')
        client.create_dataset(name_of_dataset, exists_ok=True)
        print('new dataset, {} created'.format(name_of_dataset))

    cur = conn.cursor()  #mysql conn
    cur.execute('use sales_records')
    cur.execute('select * from sales where id>={} and id<={}'.format(
        last_row_id + 1, last_row_id + by_row_batch))

    list_row = cur.fetchall()
    rows_of_extracted_mysql = []
    for i in list_row:
        rows_of_extracted_mysql.append(list(i))
    print('extracting from mysql')
    df = pd.DataFrame(rows_of_extracted_mysql,
                      columns=[
                          'id', 'region', 'country', 'item_type',
                          'sales_channel', 'Order Priority', 'order_date',
                          'order_id', 'ship_date', 'units_sold', 'unit_price',
                          'unit_cost', 'total_revenue', 'total_cost',
                          'total_profit'
                      ])

    table = pa.Table.from_pandas(df)
    # df.to_csv('test.csv')
    pq.write_table(table, source_transform)
    # the pd.to_parquet does not working for some reason, segmentation fault
    # as for mean time use pyarrow lib to to create parquet file

    # df.to_parquet(source_transform, engine='fastparquet')
    logger.info('id {} to {} being extracted'.format(
        last_row_id + 1, last_row_id + by_row_batch))
    print(
        'data extracted from id {} to {}, {} file ready to upload to cloudstorage'
        .format(last_row_id + 1, last_row_id + by_row_batch,
                source_transform))  #,source_transform)