예제 #1
0
def index():
    log_writer = logger.App_Logger()
    file_object = open("logs/imdb_scraper-{}.txt".format(datetime.now().date()), 'a+')
    if request.method == 'POST':
        log_writer.log(file_object, 'Getting the year')
        year = request.form['content']
        year = "".join(year.split())
        log_writer.log(file_object, 'received the year {}'.format(year))

        try:
            log_writer.log(file_object, 'connecting to mongo server')
            dbConn = MongoClient("mongodb://localhost:27017/")  # opening a connection to Mongo
            log_writer.log(file_object, 'connecting to db')
            db = dbConn['imdb_scrapper']  # connecting to the database called crawlerDB
            log_writer.log(file_object, 'creating/retrieving collection {}'.format(year))
            collection_name = 'movies_{}'.format(year)
            collection = db[collection_name]
            movies = db[collection_name].find({})  # searching the collection with the name same as the keyword
            if movies.count() > 0:
                log_writer.log(file_object, 'showing results from db')
                return render_template('results.html', movies=movies)
            else:
                log_writer.log(file_object, 'callign scrape function')
                movies = scrapper.scrape(year, log_writer, file_object)
                filename = 'movies_{}.csv'.format(year)

                try:
                    log_writer.log(file_object, 'creating dataframe and writing to CSV file')
                    df = pd.DataFrame(movies)
                    df.to_csv('./csv/{}'.format(filename))
                except Exception as e:

                    log_writer.log(file_object, "Exception occurred while creating csv file: {}".format(e))

                try:
                    files = os.listdir()
                    for f in files:
                        if f.endswith('.csv'):
                            shutil.move(f, 'csv')
                except Exception as e:
                    log_writer.log(file_object, "Exception occurred while moving csv file: {}".format(e))

                log_writer.log(file_object, 'inserting into collection {}'.format(year))
                collection.insert_many(df.to_dict('records'))

                return render_template('results.html', movies=movies[0:(len(movies) - 1)])

        except Exception as e:
            log_writer.log(file_object, "Exception occurred : {}".format(e))
            return 'something is wrong'
    else:
        return render_template('index.html')
예제 #2
0
            x_pca = pca.transform(data)
            # Make Dataframe with pca data
            columns = ['PC' + str(i) for i in range(1, Exp_var + 1)]
            pca_data = pd.DataFrame(data=x_pca, columns=columns)

            self.logger_object.log(
                self.file_object,
                "C:: the PCA method of the DataPreprocessor class")
            return pca_data

        except Exception as e:
            self.logger_object.log(
                self.file_object,
                'E : Exception occured in PCA method of the Data Preprocessor class. Exception message:  '
                + str(e))
            self.logger_object.log(
                self.file_object,
                'E : Unsuccessful. Exited the PCA method of the DataPreprocessor class'
            )

            raise Exception()


if __name__ == '__main__':
    log = logger.App_Logger()
    df = pd.read_csv('train.csv')
    file = open('log.txt', "a")
    print(df.dtypes)
    # Test your code by calling methods here.
예제 #3
0
 def __init__(self,records):
     self.logger_object = logger.App_Logger()
     self.db_client = MongoConnection.App_mongo_connect()
     self.records = records
예제 #4
0
    def __init__(self):
        parser = argparse.ArgumentParser()

        # Arguments
        parser.add_argument('--interval',
                            type=str,
                            default='1h',
                            help='1h / 4h / 1d / 15m, default - 1h')
        parser.add_argument('--input_datetime',
                            type=str,
                            default='',
                            help='Default - ""')
        parser.add_argument('--log_level',
                            type=str,
                            default='default',
                            help='default / critical, default - "default"')
        parser.add_argument('--local_file',
                            type=str,
                            default='',
                            help='Default - False')
        parser.add_argument('--records',
                            type=str,
                            default="False",
                            help='Default - False')
        args = vars(parser.parse_args())
        # clearing sys.argv to prevent passing them to other files
        sys.argv = [sys.argv[0]]

        print("Input Parameters are:")
        print(args)

        self.interval = args['interval']
        self.input_datetime = args['input_datetime']
        self.log_level = args['log_level']
        self.local_file = args['local_file']
        self.records = eval(args['records'])
        self.data = None

        # check arguments - exit with error if they are malformed

        self.abs_path = os.path.dirname(os.path.abspath(__file__))

        # logs (if needed)
        log_path = os.path.join(self.abs_path, "Logs", "log.txt")
        dev_log_path = os.path.join(self.abs_path, "Logs", "developer_log.txt")
        if self.local_file:
            if os.path.isfile(
                    os.path.join(self.abs_path, "Local_files",
                                 self.local_file)):
                self.local_file_path = os.path.join(self.abs_path,
                                                    "Local_files",
                                                    self.local_file)
            else:
                print("Local_file does not exist!")
                print("Terminated!")
                os.exit(-1)

        self.log_file = open(log_path, "a+")
        self.developer_log_file = open(dev_log_path, "a+")
        self.logger_object = logger.App_Logger()

        self.log_info = {
            'abs_path': self.abs_path,
            'log_file': self.log_file,
            'logger_object': self.logger_object,
            'developer_log_file': self.developer_log_file
        }

        # Converting datetime to string format

        self.datetime_string_format = "%d_%m_%Y_%H_00_00"

        if self.input_datetime == '':
            # stripping hours & add utc
            time_now = datetime.now(
                timezone.utc).strftime(self.datetime_string_format + "%z")
            self.input_datetime = datetime.strptime(time_now,
                                                    "%d_%m_%Y_%H_%M_%S%z")
            print("Using default datetime " +
                  self.input_datetime.strftime("%Y-%m-%d %H:%M:%S%z"))
        else:
            # we use only UTC time as input
            self.input_datetime = datetime.strptime(
                self.input_datetime,
                "%d_%m_%Y_%H_%M_%S").replace(tzinfo=timezone.utc)
            print("Using input datetime " +
                  self.input_datetime.strftime("%Y-%m-%d %H:%M:%S%z"))