def index(): log_writer = logger.App_Logger() file_object = open("logs/imdb_scraper-{}.txt".format(datetime.now().date()), 'a+') if request.method == 'POST': log_writer.log(file_object, 'Getting the year') year = request.form['content'] year = "".join(year.split()) log_writer.log(file_object, 'received the year {}'.format(year)) try: log_writer.log(file_object, 'connecting to mongo server') dbConn = MongoClient("mongodb://localhost:27017/") # opening a connection to Mongo log_writer.log(file_object, 'connecting to db') db = dbConn['imdb_scrapper'] # connecting to the database called crawlerDB log_writer.log(file_object, 'creating/retrieving collection {}'.format(year)) collection_name = 'movies_{}'.format(year) collection = db[collection_name] movies = db[collection_name].find({}) # searching the collection with the name same as the keyword if movies.count() > 0: log_writer.log(file_object, 'showing results from db') return render_template('results.html', movies=movies) else: log_writer.log(file_object, 'callign scrape function') movies = scrapper.scrape(year, log_writer, file_object) filename = 'movies_{}.csv'.format(year) try: log_writer.log(file_object, 'creating dataframe and writing to CSV file') df = pd.DataFrame(movies) df.to_csv('./csv/{}'.format(filename)) except Exception as e: log_writer.log(file_object, "Exception occurred while creating csv file: {}".format(e)) try: files = os.listdir() for f in files: if f.endswith('.csv'): shutil.move(f, 'csv') except Exception as e: log_writer.log(file_object, "Exception occurred while moving csv file: {}".format(e)) log_writer.log(file_object, 'inserting into collection {}'.format(year)) collection.insert_many(df.to_dict('records')) return render_template('results.html', movies=movies[0:(len(movies) - 1)]) except Exception as e: log_writer.log(file_object, "Exception occurred : {}".format(e)) return 'something is wrong' else: return render_template('index.html')
x_pca = pca.transform(data) # Make Dataframe with pca data columns = ['PC' + str(i) for i in range(1, Exp_var + 1)] pca_data = pd.DataFrame(data=x_pca, columns=columns) self.logger_object.log( self.file_object, "C:: the PCA method of the DataPreprocessor class") return pca_data except Exception as e: self.logger_object.log( self.file_object, 'E : Exception occured in PCA method of the Data Preprocessor class. Exception message: ' + str(e)) self.logger_object.log( self.file_object, 'E : Unsuccessful. Exited the PCA method of the DataPreprocessor class' ) raise Exception() if __name__ == '__main__': log = logger.App_Logger() df = pd.read_csv('train.csv') file = open('log.txt', "a") print(df.dtypes) # Test your code by calling methods here.
def __init__(self,records): self.logger_object = logger.App_Logger() self.db_client = MongoConnection.App_mongo_connect() self.records = records
def __init__(self): parser = argparse.ArgumentParser() # Arguments parser.add_argument('--interval', type=str, default='1h', help='1h / 4h / 1d / 15m, default - 1h') parser.add_argument('--input_datetime', type=str, default='', help='Default - ""') parser.add_argument('--log_level', type=str, default='default', help='default / critical, default - "default"') parser.add_argument('--local_file', type=str, default='', help='Default - False') parser.add_argument('--records', type=str, default="False", help='Default - False') args = vars(parser.parse_args()) # clearing sys.argv to prevent passing them to other files sys.argv = [sys.argv[0]] print("Input Parameters are:") print(args) self.interval = args['interval'] self.input_datetime = args['input_datetime'] self.log_level = args['log_level'] self.local_file = args['local_file'] self.records = eval(args['records']) self.data = None # check arguments - exit with error if they are malformed self.abs_path = os.path.dirname(os.path.abspath(__file__)) # logs (if needed) log_path = os.path.join(self.abs_path, "Logs", "log.txt") dev_log_path = os.path.join(self.abs_path, "Logs", "developer_log.txt") if self.local_file: if os.path.isfile( os.path.join(self.abs_path, "Local_files", self.local_file)): self.local_file_path = os.path.join(self.abs_path, "Local_files", self.local_file) else: print("Local_file does not exist!") print("Terminated!") os.exit(-1) self.log_file = open(log_path, "a+") self.developer_log_file = open(dev_log_path, "a+") self.logger_object = logger.App_Logger() self.log_info = { 'abs_path': self.abs_path, 'log_file': self.log_file, 'logger_object': self.logger_object, 'developer_log_file': self.developer_log_file } # Converting datetime to string format self.datetime_string_format = "%d_%m_%Y_%H_00_00" if self.input_datetime == '': # stripping hours & add utc time_now = datetime.now( timezone.utc).strftime(self.datetime_string_format + "%z") self.input_datetime = datetime.strptime(time_now, "%d_%m_%Y_%H_%M_%S%z") print("Using default datetime " + self.input_datetime.strftime("%Y-%m-%d %H:%M:%S%z")) else: # we use only UTC time as input self.input_datetime = datetime.strptime( self.input_datetime, "%d_%m_%Y_%H_%M_%S").replace(tzinfo=timezone.utc) print("Using input datetime " + self.input_datetime.strftime("%Y-%m-%d %H:%M:%S%z"))