Exemple #1
0
def preProcessTweets():
    processedTweets = []
    with open("E:/twitter10k.csv", newline='', encoding='utf-8') as csvfile:
        reader = csv.reader(csvfile, delimiter=',', quotechar='|')

        for row in reader:
            try:
                #print(counter)
                #print(row)
                tweet = row[-2]
                #print(tweet)
                #print('############')
                preProcessor = preprocessor.Preprocessor()
                processedTweet = preProcessor.preprocess_text(tweet)
                #print(processedTweet)
                for word in processedTweet:
                    if (word != 'http' and word != 'get' and word != 'is'
                            and word != 'ny' and word != 'lol'
                            and word != 'lol' and word != 'na' and word != 'u'
                            and word != '-' and word != 'us' and word != 'im'):
                        processedTweets.append(word)
                #print('done processing ############')
            except Exception as e:
                i = 1
                #print(str(e))

        return processedTweets
    def __init__(self,
                 image_file_paths,
                 mean_image=None,
                 bandstats_file_path=None,
                 original_size_x=650,
                 original_size_y=650,
                 input_size=256,
                 slice_count_x=1,
                 slice_count_y=1,
                 is_8_channel=True):
        super().__init__()
        # logger.info("Creating image list dataset from {} images".format(str(len(image_file_paths))))
        self.preprocessor = preprocessor.Preprocessor(
            datapath=None,
            original_size_x=original_size_x,
            original_size_y=original_size_y,
            input_size=input_size,
            slice_count_x=slice_count_x,
            slice_count_y=slice_count_y,
            is_8_channel=is_8_channel)
        # Change location of bandstats file, it will not figure out on its own.
        self.preprocessor.path_mgr.bandstats_file = bandstats_file_path
        self.image_file_paths = image_file_paths
        self.slice_count = slice_count_x * slice_count_y
        self.current_image_path = ""
        self.is_8_channel = is_8_channel
        self.preloaded_slices = {}

        # TODO(martun): later change mean substraction as a transformation.
        self.mean_image = mean_image
def voodooOneFile(fullName, inputPath, fileList):
    fullOutput = fullOutputName(fullName, inputPath)
    mkdirOf(fullOutput)
    output = ''
    try:
        output += voodoo.voodoo(input=fullName,
                                output=fullOutput,
                                pathToRemoveFromIdentifier=inputPath,
                                voodooDBFile=args.voodooDB,
                                includes=args.includePath,
                                defines=args.define,
                                trace=False,
                                preIncludes=args.preInclude)
        state = "V"
    except Exception, e:
        if str(e).find("all argume") != -1:
            raise
        inputLines = voodoo._readLinesOfFile(fullName)
        prepro = preprocessor.Preprocessor(fullName, fullOutput, inputLines,
                                           inputPath)
        output += prepro.intercepter()
        output += "\n/* The error that forced interception:\n" + \
                    str( e ).replace( "*/", "* /" ) + "\n"
        output += "\n"
        output += "Voodoo stack trace:\n" + traceback.format_exc()
        output += "*/\n"
        output += "\n"
        state = "I"
Exemple #4
0
def main():
    pp = preprocessor.Preprocessor()
    csvs = []
    csvs.extend(pp.do_udc())
    csvs.extend(pp.do_cmdc())
    csvs.extend(pp.do_wiki())
    csvs.extend(pp.do_bdc())
    lexicon.Lexicon(csvs)
Exemple #5
0
def preprocessor(argv):
    mp3_path = argv[2]
    book_path = argv[3]
    print("You run preprocessor.")
    print("Path to mp3: " + mp3_path)
    print("Path to book: " + book_path)
    preprocessor = pr.Preprocessor(mp3_path, book_path, PREPROCESSOR_PATH)
    preprocessor.preprocess()
Exemple #6
0
 def set_preprocessor(self, preprocessor_):
     if preprocessor_ is None:
         preprocessor_ = [preprocessor.Preprocessor()]
     elif type(preprocessor_) is not list:
         preprocessor_ = [preprocessor_]
     self.preprocessors = preprocessor_
     Xtrain, ytrain = self.split(self.training_data, self.target_column)
     for pp in self.preprocessors:
         pp.fit(Xtrain)
         Xtrain = self._preprocess_one(Xtrain, pp)
 def __init__(self, nominal_src, nominal_file, conditions_ls):
     self.src = nominal_src
     self.input_name = nominal_file
     self.nominal_model = preprocessor.Preprocessor(nominal_src +
                                                    nominal_file)
     self.nominal_model.clean_input()
     self.nominal_model.generate_species_classes()
     self.Temp_ls = [conditions_ls[0]]  # should be a list
     self.Pres_ls = [conditions_ls[1]]  # should be a list
     self.Energy_grid = conditions_ls[2]  # should be a float
     self.new_ne_file = []
Exemple #8
0
    def setUp(self):
        # initial runtime environment
        args = {"config_file": "../config_omniphotos.yaml"}
        self.preprocessor = preprocessor.Preprocessor(args)

        self.preprocessor.root_dir = \
            pathlib.Path("D:/workdata/testDatasets/circular/KyotoShrines_test")
        self.preprocessor.image_output_path= \
            pathlib.Path("D:/workdata/testDatasets/circular/KyotoShrines_test/Input")
        self.preprocessor.FPS = 50
        self.preprocessor.omniphotos_config_template_path = \
            "D:/workspace/Python/preprocessing/template/config.yaml.template"
Exemple #9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--project_dir", type=str, required=True)
    parser.add_argument("--report_dir", type=str, required=True)
    parser.add_argument("--num_files_to_print",
                        type=int,
                        required=False,
                        default=20)
    args = parser.parse_args()

    project_dir = args.project_dir
    report_dir = args.report_dir
    num_files_to_print = args.num_files_to_print

    project_dir = "../data/ZXing"
    report_dir = "../data/ZXing/ZXingBugRepository.xml"

    project_report_info = preprocessor.Preprocessor(project_dir, report_dir)
    similarity_info = similarity_calculator.SimilarityCalculator(
        project_report_info.project_frequency_dict,
        project_report_info.report_frequency_dict,
        project_report_info.xml_report, num_files_to_print)

    project_dir = "../data/Rhino"
    report_dir = "../data/Rhino/RhinoBugRepository.xml"

    project_report_info = preprocessor.Preprocessor(project_dir, report_dir)
    similarity_info = similarity_calculator.SimilarityCalculator(
        project_report_info.project_frequency_dict,
        project_report_info.report_frequency_dict,
        project_report_info.xml_report, num_files_to_print)

    project_dir = "../data/JodaTime/"
    report_dir = "../data/JodaTime/JodaTimeBugRepository.xml"

    project_report_info = preprocessor.Preprocessor(project_dir, report_dir)
    similarity_info = similarity_calculator.SimilarityCalculator(
        project_report_info.project_frequency_dict,
        project_report_info.report_frequency_dict,
        project_report_info.xml_report, num_files_to_print)
Exemple #10
0
def preprocess_input(document,
                     lower=True,
                     remove_punctuation=False,
                     remove_stop_words=False):
    preprocessor = pp.Preprocessor()
    if lower:
        document = document.lower()
    if remove_punctuation:
        document = preprocessor.remove_punctuation(document)
    if remove_stop_words:
        document = preprocessor.remove_stop_words(document,
                                                  german=True,
                                                  english=True)
    return document
Exemple #11
0
    def make_dataframe(self):
        test_df = pd.read_csv(self.test_path, names=['x', 'y'])

        test_data = preprocessor.Preprocessor(test_df, self.model.vocab)
        test_data.tokenize('x')
        test_data.add_tags('x')
        test_data.lemmatize('x')

        test_data.update_dataframe('x', 'y')

        test_data.data['pos_score'], test_data.data[
            'neg_score'], test_data.data['likelihood_pos'] = (
                self.model.predict(test_data.data))
        return test_data.data
 def __init__(self,
              input_path,
              nominal_file,
              perturb_dict,
              nominal_dict,
              abstraction=False):
     self.input_path = input_path
     self.input_name = nominal_file
     self.nominal_model = preprocessor.Preprocessor(input_path +
                                                    nominal_file)
     self.nominal_model.clean_input()
     self.nominal_model.generate_species_classes(abstraction=abstraction)
     self.perturb_dict = perturb_dict
     self.nominal_dict = nominal_dict
     self.abstraction = abstraction
Exemple #13
0
def main():
    prep = preprocessor.Preprocessor()
    # BUG:
    raw_img_data = prep.read_img_jpeg_bytes("./pedestrain.jpg")
    warm_up(prep)

    server_addr = "/tmp/coin_dl_server"
    client_addr = "/tmp/coin_dl_client"

    for addr in [client_addr, server_addr]:
        try:
            os.remove(addr)
        except OSError:
            pass

    main_loop(server_addr, client_addr, prep, raw_img_data)
Exemple #14
0
 def warm_up(self, det, mode):
     self.logger.info("Warm-up the detector")
     start = time.time()
     raw_img_data = det.read_img_jpeg_bytes("./pedestrain.jpg")
     if mode == "raw":
         # Warm up the session, first time inference is slow
         ret = det.inference(raw_img_data)
         ret = det.get_detection_results(*ret)
     elif mode == "preprocessed":
         prep = preprocessor.Preprocessor()
         compressed_img_data = prep.inference(raw_img_data, 70)
         ret = det.inference(compressed_img_data)
         ret = det.get_detection_results(*ret)
     duration = time.time() - start
     self.logger.info(
         f"Warm-up the mode {mode} finished! Takes {duration} seconds")
Exemple #15
0
def generate_files():
    check_nltk_resources()
    check_paths()

    p = preprocessor.Preprocessor()
    df = p.load_dataset()

    p.preprocess_synopses(df)
    p.preprocess_genres(df)
    p.build_indexes()
    if settings.USE_W2V:
        p.generate_embedding_weights()
    p.filter_dataset()
    p.encode_genres()
    p.encode_synopses()
    p.save_data()
Exemple #16
0
def get_predictions(g, n):
    possible_genres = list(g.mlb.classes_)
    print("Possible film genres: ", ','.join(possible_genres))
    input_line = 'r'  #input("Insert a comma separated set of genres (r for random, q for quit): ")
    if input_line == 'q':
        exit()
    randomly = input_line == 'r'
    p = preprocessor.Preprocessor()
    if randomly:
        n_genres = random.randint(1, 6)
        input_genres = random.sample(possible_genres, n_genres)
    else:
        input_genres = input_line.split(',')
        for ig in input_genres:
            if ig not in possible_genres:
                print(ig + " is not a possible genre")
                get_predictions(g, n)
    print("Input genres: ", ', '.join(input_genres))
    encoded_genres = g.mlb.transform([input_genres])
    mode = input("Input g or b for greedy or beam search mode: ")
    previous_words = input("Introduce help/previous words (optional): ")
    previous_words = p.clean_text(previous_words)
    previous_words = p.tokenize(previous_words)[:-1]
    prvs = []
    for pw in previous_words:
        if pw in g.word_to_index.keys():
            prvs.append(pw)
        else:
            prvs.append(settings.UNKNOWN_TOKEN)
    if previous_words == '':
        previous_words = None
    print("Starting words: " + str(previous_words))
    if mode == 'g':
        print("Greedy search mode")
        syn = get_predictions_greedy(g, n, encoded_genres, previous_words)
    elif mode == 'b':
        print("Beam search mode")
        syn = get_predictions_beam(g=g,
                                   n=n,
                                   encoded_genres=encoded_genres,
                                   previous_words=previous_words)
    else:
        print("Wrong mode")
        get_predictions(g, n)
    print("Synopsis: ", syn)
    get_predictions(g, n)
class FramePipeline:
    cam = cm.Camera()
    preprocessor = prep.Preprocessor()
    homographyOp = h**o.Homography()
    laneLinesFinder = None
    currOriginalFrame = None
    visualizer = None

    def __init__(self, frameWidth, frameHeight):
        self.frameWidth = frameWidth
        self.frameHeight = frameHeight
        self.cam.init(9, 6, 'camera_cal/calibration*.jpg')
        self.cam.calibrate()
        self.homographyOp.setFrameSize(frameWidth, frameHeight)
        self.homographyOp.estimateRoadHomography()
        self.laneLinesFinder = lf.LaneLinesFinder(frameWidth, frameHeight)
        self.visualizer = visu.Visualizer(self.laneLinesFinder, self)

    def processFrame(self, InputImg):

        self.currOriginalFrame = InputImg

        undistortedImg = self.cam.undistortImg(InputImg)

        sobelImg = self.preprocessor.extractEdges(undistortedImg, 'all')

        croppedImg = self.preprocessor.crop(sobelImg)

        rectImg = self.homographyOp.warp(croppedImg)

        warped_out = self.laneLinesFinder.findLane(rectImg)

        output = self.visualizer.visualizeFrame(rectImg)

        #only for the report at the end
        #cv2.imwrite('afterUndist.jpg', undistortedImg)
        #cv2.imwrite('afterSobel.jpg', sobelImg)
        #cv2.imwrite('afterCropping.jpg', croppedImg)
        #cv2.imwrite('afterRectifying.jpg', rectImg)
        #cv2.imwrite('afterFitting.jpg', warped_out)
        #cv2.imwrite('afterWarpingBack.jpg', output)
        #cv2.waitKey()
        return output
    def __init__(self, methodName):
        super().__init__(methodName)

        self.preprocessor = preprocessor.Preprocessor()
        column_names = ['label', 'text']
        data_to_process = {
            'label': ['ham', 'ham', 'spam'],
            'text': [
                'Not normalized   #$text', '   AnOthEr not normalized TEXT',
                'Not normalized     SPAM'
            ],
        }
        self.dataset_to_process = pd.DataFrame(data_to_process,
                                               columns=column_names)

        correct_data = {
            'label': [0, 0, 1],
            'text':
            ['normalized text', 'another normalized text', 'normalized spam'],
        }
        self.correct_dataset = pd.DataFrame(correct_data, columns=column_names)
Exemple #19
0
 def test2():
     '''
     ===========================================================
     Following steps:
         1. Add numbers and punct
         2. No tokenizer
         3. Stop words
         4. pos tag
     ===========================================================
     '''
     print('running preprocessor test 2 ...')
     pattern = re.compile(r'[^а-яА-я0-9,.!?;\- ё]')
     analyzer = maru.get_analyzer(tagger='linear')
     config = preprocessor.Config(regexp=pattern,
                                  stopwords=stopwords_set,
                                  analyzer=analyzer,
                                  with_pos_tag=True,
                                  remove_stop_words=False,
                                  lemmatize=True,
                                  tokenizer=None)
     pipeline = preprocessor.Preprocessor(config)
     case = [
         'Так говорила в июле 1805 года известная',
         '— Как можно быть здоровой... когда нравственно страдаешь?',
         'праздник отменен, Je vous avoue que toutes ces fêtes'
     ]
     expected = [[
         'так_ADV', 'говорить_VERB', 'в_ADP', 'июль_NOUN', '1805_NUM',
         'год_NOUN', 'известный_ADJ'
     ],
                 [
                     'как_CONJ', 'можно_ADJ', 'быть_VERB',
                     'здоровой..._CONJ', 'когда_CONJ', 'нравственно_ADV',
                     'страдаешь?_PRON'
                 ], ['праздник_NOUN', 'отменен,_VERB']]
     res = pipeline.fit(case).transform(case)
     for res_line, expected_line in zip(res, expected):
         assert compare(res_line, expected_line), \
             'failed with {} and {}'.format(res_line, expected_line)
     print('test 2 passed')
Exemple #20
0
 def test4():
     '''
     ===========================================================
     Following steps:
         1. Add numbers and punct
         2. Razdel tokenizer
         3. Stop words
         4. No pos tag
         5. No lemmatization
     ===========================================================
     '''
     print('running preprocessor test 3 ...')
     pattern = re.compile(r'[^а-яА-я0-9,.!?;\- ё]')
     analyzer = maru.get_analyzer(tagger='linear')
     tokenizer = razdel
     config = preprocessor.Config(regexp=pattern,
                                  stopwords=stopwords_set,
                                  analyzer=analyzer,
                                  with_pos_tag=False,
                                  remove_stop_words=False,
                                  lemmatize=False,
                                  tokenizer=tokenizer)
     pipeline = preprocessor.Preprocessor(config)
     case = [
         'Так говорила в июле 1805 года известная',
         '— Как можно быть здоровой... когда нравственно страдаешь?',
         'праздник отменен, Je vous avoue que toutes ces fêtes'
     ]
     expected = [[
         'так', 'говорила', 'в', 'июле', '1805', 'года', 'известная'
     ],
                 [
                     'как', 'можно', 'быть', 'здоровой', '...', 'когда',
                     'нравственно', 'страдаешь', '?'
                 ], ['праздник', 'отменен', ',']]
     res = pipeline.fit(case).transform(case)
     for res_line, expected_line in zip(res, expected):
         assert compare(res_line, expected_line), \
             'failed with {} and {}'.format(res_line, expected_line)
     print('test 4 passed')
Exemple #21
0
def test():
    raw_img_data = Detector.read_img_jpeg_bytes("./pedestrain.jpg")
    det = Detector(mode="raw")
    ret = det.inference(raw_img_data)
    resp = det.get_detection_results(*ret)
    print("*** Inference result of raw image!")
    print(resp)
    del det
    gc.collect(1)
    gc.collect(2)

    # Test detection of preprocessed image
    prep = preprocessor.Preprocessor()
    compressed_img_data = prep.inference(raw_img_data, 70)
    print(
        f"*** Raw image size: {len(raw_img_data)}B, preprocessed image size: {len(compressed_img_data)}B"
    )
    det = Detector(mode="preprocessed")
    ret = det.inference(compressed_img_data)
    resp_prep = det.get_detection_results(*ret)
    print("*** Inference result of preprocessed image!")
    print(resp_prep)
Exemple #22
0
    def doc2vec(self, sentences):
        fname = get_tmpfile('doc2vec.model')
        edited_sentences = {}
        train_corpus = []
        count = 0
        for index, sentence in sentences.items():
            processed_sentence = preprocessor.Preprocessor(
                sentence).preprocessData()
            if not processed_sentence:
                continue
            else:
                tokens = gensim.utils.simple_preprocess(processed_sentence)
                train_corpus.append(TaggedDocument(tokens, str(count)))
                edited_sentences[count] = sentence
                count = count + 1

        model = Doc2Vec(train_corpus,
                        vector_size=10,
                        dbow_words=1,
                        dm=1,
                        window=2,
                        min_count=2)
        return (model, train_corpus, edited_sentences)
Exemple #23
0
 def test1():
     '''
     ===========================================================
     Full house:
         1. Leave only alphabet characters
         2. Remove stop words
         3. Lemmatize and add pos tags
     ===========================================================
     '''
     print('running preprocessor test 1 ...')
     pattern = re.compile(r'[^а-яА-я ё]')
     analyzer = maru.get_analyzer(tagger='linear')
     config = preprocessor.Config(regexp=pattern,
                                  stopwords=stopwords_set,
                                  analyzer=analyzer,
                                  with_pos_tag=True,
                                  remove_stop_words=True,
                                  lemmatize=True,
                                  tokenizer=None)
     pipeline = preprocessor.Preprocessor(config)
     case = [
         'Так говорила в июле 1805 года известная',
         '— Как можно быть здоровой... когда нравственно страдаешь?',
         'праздник отменен, Je vous avoue que toutes ces fêtes'
     ]
     expected = [
         ['говорить_VERB', 'июль_NOUN', 'год_NOUN', 'известный_ADJ'],
         ['здоровый_ADJ', 'нравственно_ADV', 'страдать_VERB'],
         ### here is the case when lemmatization fails
         ['праздник_NOUN', 'отменный_ADJ']
     ]
     res = pipeline.fit(case).transform(case)
     for res_line, expected_line in zip(res, expected):
         assert compare(res_line, expected_line), \
             'failed with {} and {}'.format(res_line, expected_line)
     print('test 1 passed')
Exemple #24
0
 def getNews(self):
     links = self.getGoogleLinks()
     print(len(links))
     news = {}
     for item in range(self.number):
         # Get the text of article
         date = int(links[item][0])
         news[date] = {}
         link = links[item][1]
         article = requests.get(link)
         soup = BeautifulSoup(article.text, "html.parser")
         for script in soup(["script", "style", "meta", "noscript"]):
             script.extract()  # rip it out
         text = soup.get_text()
         # Get the source
         source_1 = re.search('\.\\s*([^.]*)', link).group(1)
         source_2 = re.search('//\\s*([^.]*)', link).group(1)
         if "/" in source_1:
             source = source_2
         else:
             source = source_1
         news[date]['source'] = source
         news[date]['text'] = preprocessor.Preprocessor(text).preprocessData()
     return news
Exemple #25
0
    try:
        output += voodoo.voodooExpectSource(
            input=fullName,
            output=fullOutput,
            pathToRemoveFromIdentifier=inputPath,
            voodooDBFile=args.voodooDB,
            includes=args.includePath,
            defines=args.define,
            trace=False,
            preIncludes=args.preInclude)
        state = "V"
    except Exception, e:
        if str(e).find("all argume") != -1:
            raise
        inputLines = voodoo._readLinesOfFile(fullName)
        prepro = preprocessor.Preprocessor(fullName, fullOutput, inputLines,
                                           inputPath)
        output += prepro.intercepter()
        output += "\n/* The error that forced interception:\n" + \
                    str( e ).replace( "*/", "* /" ) + "\n"
        output += "\n"
        output += "Voodoo stack trace:\n" + traceback.format_exc()
        output += "*/\n"
        output += "\n"
        state = "I"
    f = file(fullOutput, "w")
    f.write(output)
    f.flush()
    f.close()

    sys.stdout.write("  <%d/%d> %s  %s\n" % (1 + fileList.index(
        (fullName, inputPath)), len(fileList), state, fullOutput))
Exemple #26
0
import csv
import matplotlib.pyplot as plt
import pandas as pd

from keras.layers import Dense
from keras.optimizers import Adam
from keras.models import Sequential
from keras.callbacks import EarlyStopping

import preprocessor
import featureanalysis

# Above this value, the survival flag will be true
PROBABILITY_MARGIN_SURVIVAL = 0.5

prepr = preprocessor.Preprocessor()
prepr.process_training_dataset('train.csv')

df = pd.read_csv('train.csv')

# perform feature analysis
numerical_features = ["Survived", "SibSp", "Parch", "Age", "Fare"]
feat_analysis = featureanalysis.FeatureAnalysis(df)
feat_analysis.get_correlation_numerical_values(numerical_features)

# removed cabin and name columns
input_value, output = prepr.get_train_datasets()

# Get number of columns in training data
n_cols = input_value.shape[1]
Exemple #27
0
import sys

import preprocessor

CMD_OVERWRITE_OPTION = '-ow'

if __name__ == "__main__":
    # Check arguments
    if len(sys.argv) >= 3:
        # At least 2 arguments have been passed
        inp = sys.argv[1]
        out = sys.argv[2]
        # Define overwrite option
        overwrite = len(sys.argv) >= 4 and sys.argv[3] == CMD_OVERWRITE_OPTION
        p = preprocessor.Preprocessor(preprocessor.Language.vietnamese)
        try:
            p.preprocess_files(inp, out, {'overwrite': overwrite})
        except (FileNotFoundError, FileExistsError) as errors:
            for e in errors.args:
                if e:
                    print(e)
    else:
        print('Missing arguments. Arguments: input output [-ow]')
Exemple #28
0
from extractor import Extractor
import preprocessor
import dictionary
import vectorizer
import dataset_divider
import classifier
import time

start = time.time()
PreProcessor = preprocessor.Preprocessor()
Dictionary = dictionary.Dictionary()
categories = [1, 2, 3]  # Categories to be includes

lines = []
for category in categories:
    lines.append(
        Extractor.extract(('flashback' + str(category) + '.json'),
                          ('extracted' + str(category) + '.txt')))
    dataset_divider.Divider.divide(('extracted' + str(category) + '.txt'),
                                   lines[len(lines) - 1])

# pre-processing of training data
processed = []
processed_test = []
for category in categories:
    processed.append(
        PreProcessor.preprocess('training' + str(category) + ".txt"))
    processed_test.append(
        PreProcessor.preprocess('testing' + str(category) + ".txt"))

with open("testingposts.txt", "w") as file:
Exemple #29
0
import argparse

import preprocessor
from definitions import TEST_PROCESSED_PATH, TRAIN_PROCESSED_PATH


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-t', '--train', action='store_true')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    preprocess = preprocessor.Preprocessor(train=args.train, dl=False)
    preprocess_data = preprocess.clean_data()
    path = TRAIN_PROCESSED_PATH if args.train else TEST_PROCESSED_PATH
    preprocess_data.to_csv(path, encoding='utf-8', index=False)
def main():
    """Main method for controlling the flow of the stylometric analyser.

    Function for creating of objects for word, character, punctuation, word length
    etc analysis.= to determine the patterns of styles in different works.

    """

    #Column names
    colnames = ['work', 'char_freq', 'punc_freq', 'stop_freq', 'word_len_freq']
    #Initializing an empty dataframe to store all stats after analysis
    all_text_stats = pd.DataFrame(columns=colnames)

    #Try block
    try:
        #-----------------------------Analysis----------------------------------
        #Main loop for doing the analysis file by file
        for work in works:
            #calling read_input function to read the content of each file
            content = read_input(work)

            #Creating object for preprocessor class
            pre_processor = prpscr.Preprocessor()
            pre_processor.tokenise(content)
            #Fetching the tokens
            tokens = pre_processor.get_tokenised_list()

            #Creating object for CharacterAnalyser class
            char_analyser = char.CharacterAnalyser()
            #Analysing at character level
            char_analyser.analyse_characters(tokens)
            #Fetching the character occurences
            ch_occ = char_analyser.char_occ
            #Fetching the punctuation occurences
            punc_occ = char_analyser.get_punctuation_frequency()

            #Creating object for WordAnalyser class
            word_analyser = word.WordAnalyser()
            #Analysing at word level
            word_analyser.analyse_words(tokens)
            #Fetching the stop word occurences
            stop_occ = word_analyser.get_stopword_frequency()
            #Fetching the word length occurences
            word_len_occ = word_analyser.get_word_length_frequency()

            #Temporary df to store all the analysis for one text at a time
            temp_df = pd.DataFrame(
                [[work, ch_occ, punc_occ, stop_occ, word_len_occ]],
                columns=colnames)

            all_text_stats = all_text_stats.append(temp_df, ignore_index=True)

        #-----------------------------Visualisation-----------------------------
        #Creating object for Visualiser class
        visualiser = vis.AnalysisVisualiser(all_text_stats)
        #Visualising punctuation frequencies in all the works
        visualiser.visualise_punctuation_frequency()
        #Visualising character frequencies in all the works
        visualiser.visualise_character_frequency()
        #Visualising stopword frequencies in all the works
        visualiser.visualise_stopword_frequency()
        #Visualising word length frequencies in all the works
        visualiser.visualise_word_length_frequency()

    #Catch for exceptions
    except ImportError as err:
        print(
            'IMPORT ERROR :', err,
            '. Please check the working directory, name or ' +
            'make sure that module is imported!')
    except TypeError as err:
        print('TYPE ERROR :', err)
    except IndexError as err:
        print('INDEX ERROR :', err)
    except ValueError as err:
        print('VALUE ERROR :', err)
    except IOError as err:
        print('INPUT ERROR :', err, '. Please check the path of the file!')
    except requests.RequestException as err:
        print('REQUEST ERROR :', err)
    except:
        print('UNEXPECTED ERROR!')