def Validate_Birthday(self, Given_Birthday, Given_Age):
     output = False
     error = ""
     clean = Cleaner()
     result = clean.Clean_Birthday(Given_Birthday)
     #Checks to see if the birthday was cleaned
     if (result[0] != None):
         if (result[1] == ""):
             date_Details = result[0].split("-")
             str_Day = date_Details[0]
             str_Month = date_Details[1]
             str_Year = date_Details[2]
             try:
                 given_Birth_Date = datetime.date(int(str_Year),
                                                  int(str_Month),
                                                  int(str_Day))
                 today = datetime.datetime.now()
                 should_Be_Age = today.year - given_Birth_Date.year - (
                     (today.month, today.day) <
                     (given_Birth_Date.month, given_Birth_Date.day))
                 if should_Be_Age == Given_Age:
                     output = True
                 else:
                     error = "The age given and birthday do not line up"
             except:
                 error = "Birthday is not a valid date"
         else:
             error = result[1]
     else:
         error = "Birthday wasnt not in a logical format"
     return output, error
Ejemplo n.º 2
0
 def mkfs(self, args):
     str = ""
     if len(args) > 2:
         str += "Usage: mkfs [-reuse]"
         return str
     brandnew = True
     if len(args) > 1:
         if args[1] == "-reuse":
             brandnew = False
         else:
             str += "Usage: mkfs [-reuse]"
             return str
     segmentmonitor = Cleaner.SegmentMonitor()
     Disk.disk = DiskClass(brandnew=brandnew)
     Segment.segmentmanager = SegmentManagerClass(segmentmonitor)
     InodeMap.inodemap = InodeMapClass()
     LFS.filesystem = LFSClass(initdisk=brandnew)
     if CLEANERFLAG:
         Cleaner.cleaner = Cleaner.CleanerClass(segmentmonitor)
         Cleaner.cleaner.start()  #Starting the cleaner thread
     if brandnew:
         Inode.inodeidpool = 1  #Resetting this to 1 because mkfs might be run multiple times
         rootinode = Inode(isdirectory=True)  #We make the root inode here
     else:
         LFS.filesystem.restore()
     return "1"
Ejemplo n.º 3
0
def main():
    description = "Cleans up old backups to leave more room on the backup server." \
                  "\n\nE.g. python cleaner.py -p /path/to/archive -o 3:4 7:7." \
                  "\n\nThe example provided will keep an archive from every 4th day if it's more than 3 days old" \
                  " and archive every 7 days if it's more than a week old." \
                  "\n\nThe format of backups this script takes is BACKUP_SET-VERSION."
    parser = argparse.ArgumentParser(
        description=description, formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument('-p',
                        '--root-path',
                        type=str,
                        required=True,
                        help='The root path of your backups.')
    parser.add_argument(
        '-o',
        '--options',
        type=str,
        required=True,
        nargs='*',
        help='Your age threshold and desired interval size separated by a colon'
    )
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        help='Automatically confirms that you want to delete.')
    args = parser.parse_args()

    calc = Calculator(args.root_path, args.options, args.force)
    calc.calculate()

    cleaner = Cleaner(calc)
    cleaner.clean()
Ejemplo n.º 4
0
def run():
    #Set home directory
    homedir = os.path.dirname(os.path.realpath(__file__))
    #Get full path to reference genome file (must be in files folder)
    #referencefilepath = Functions.parent_dir(homedir) + '/files/hg19_whole_genome.fa'
    #Get full path to bidirectional hits file (must be in files folder)
    bidirectionalfilepath = Functions.parent_dir(homedir) + '/files/bidirectional_hits.merge.bed'
    #Get full path to motif database for tomtom (must be in files folder)
    tomtomdir = Functions.parent_dir(homedir) + '/files/HOCOMOCOv9_AD_MEME.txt'
    if boolean == True:
        print "Cleaning directory..."
        #Deletes all files and folders in given directory/TF/peak_files
        cl.run(directory)
    print "running main\npreparing files for MEME..."
    #Bedtools intersect on all *.bed* files , then bedtools merge to ensure non-overlapping intervals
    rc.run(directory)
    #Converts ConsolidatedPeak.merge.bed to ConsolidatedPeak.merge.fasta
    b2f.run(directory, referencefilepath)
    print "done\nrunning MEME..."
    #Runs MEME, FIMO, and TOMTOM on all ConsolidatedPeak.merge.fasta
    meme.run(directory, 10000000, 10000000, tomtomdir)
    print "done\nfixing FIMO files..."
    #Removes duplicates, orders, and eliminates first column of FIMO output files
    ff.run(directory)
    print "done\ngetting motif distances to i..."
    #Calculates motif distance to bidir center for each motif of each TF
    dist.run(directory, bidirectionalfilepath, homedir)
    print "done\ngenerating overlap numbers..."
    #Determines site overlap between bidir, ChIP, and FIMO sites
    so.run(directory, bidirectionalfilepath, homedir)
    print "done"
    
Ejemplo n.º 5
0
 def __init__(self):
     Cleaner.main()
     super(MainWindow, self).__init__()
     self.setGeometry(50, 50, 1200, 800)
     self.setWindowTitle("ReForm IT")
     session_id = QLabel(self)
     session_id.setText("Session_key:" + str(MainWindow.sk))
     session_id.move(800, 0)
     session_id.resize(300, 50)
     self.startWindow1()
Ejemplo n.º 6
0
 def initUI(self):
     newfont = QFont("Times", 22, QFont.Bold)
     self.finalLabel = QLabel(self)
     self.finalLabel.setText(
         "Your data will soon be updated to the client database. Please don't close the application."
     )
     self.finalLabel.setFont(newfont)
     self.finalLabel.move(200, 300)
     self.finalLabel.resize(1000, 70)
     Uploader.main(MainWindow.form_id, MainWindow.sk)
     Cleaner.main()
     self.finalLabel.setText("Data Uploaded! You may now exit!")
 def Validate_Age(self, Given_Age):
     clean = Cleaner()
     result = clean.Clean_Age(Given_Age)
     error = ""
     output = False
     #Checks to see if the Cleaner could clean the Given_Age
     if (result[0] != None):
         current_Age = result[0]
         #Checks to see if current_Age is within the 0-99 range
         if 0 <= current_Age <= 99:
             output = True
         else:
             error = "Age not between 0 and 99"
     else:
         error = result[1]
     return output, error
Ejemplo n.º 8
0
 def add_section(self, cleaner_id, name):
     """Add a section (cleaners)"""
     self.cleaner_ids.append(cleaner_id)
     self.cleaners[cleaner_id] = Cleaner.Cleaner()
     self.cleaners[cleaner_id].id = cleaner_id
     self.cleaners[cleaner_id].name = name
     self.cleaners[cleaner_id].description = _('Imported from winapp2.ini')
Ejemplo n.º 9
0
 def run(self):
     """
     Start processing.
     """
     # parse the command line arguments and set logging options
     try:
         self.args = self.parser.parse_args()
         self.configureLogging()
         self.logger.info("Started with {0}".format(' '.join(sys.argv[1:])))
     except Exception as e:
         self.parser.print_help()
         sys.exit(e)
     # load the configuration file
     try:
         with open(self.args.config) as f:
             self.config.readfp(f)
     except Exception as e:
         self.logger.critical("Could not load the specified configuration file")
         sys.exit(e)
     # set options
     Cfg.LOG_EXC_INFO = self.args.trace
     # execute commands
     with Timer.Timer() as t:
         if self.args.crawl:
             import Crawler
             Crawler.crawl(self.config, self.args.update)
         if self.args.clean:
             import Cleaner
             Cleaner.clean(self.config, self.args.update)
         if self.args.infer:
             import Facter
             Facter.infer(self.config, self.args.update)
         if self.args.graph:
             import Grapher
             Grapher.graph(self.config, self.args.update)
         if self.args.transform:
             import Transformer
             Transformer.transform(self.config)
         if self.args.post:
             import Poster
             Poster.post(self.config)
         if self.args.analyze:
             import Analyzer
             Analyzer.analyze(self.config, self.args.update)
     self.logger.info("Indexer finished in {0}:{1}:{2}".format(t.hours, t.minutes, t.seconds))
Ejemplo n.º 10
0
def create_training_data():
    print("Loading articles... This may take a while")
    t_start = time.time()
    articles = []
    for root, dirnames, filenames in os.walk('./Articles'):
        for filename in fnmatch.filter(filenames, '*.txt'):
            articles.append(os.path.join(root, filename))
    print("Loading articles complete. Took {0} seconds...".format(time.time() - t_start))



    # Questions

    # Q1
    in_random_articles = input("Use random articles? [y/N]")
    if in_random_articles == "y":
        random.shuffle(articles)
        in_random_articles = True

    # Q2
    in_clean_file = input("Clean articles [Y/n]")
    if in_clean_file == "n":
        in_clean_file = False
    else:
        in_clean_file = True

    # Q3
    in_num_articles = input("Number or articles? [Default: 10]")
    try:
        num_articles = int(in_num_articles)
    except:
        num_articles = 10

    selected_articles = articles[0:min(len(articles), num_articles)-1]

    try:
        os.mkdir("./Training")
    except:
        pass

    training_filename = "Training-{0}-{1}-{2}-{3}.txt".format( \
        "Clean" if in_clean_file == True else "Dirty", \
        "Shuffle" if in_random_articles else "Iterate", \
        num_articles, \
        str(uuid.uuid4())[:8])
    for article in selected_articles:
        with codecs.open("./Training/" + training_filename, "a+", encoding="utf8") as file:
            with codecs.open(article,'r', encoding="utf8") as f:
                content = f.read()
                if in_clean_file == True:
                    content = Cleaner.clean(content)

                file.write(content)
    print("Created Training set named: {0}".format(training_filename))
Ejemplo n.º 11
0
def gen_small_output(title, location, company, date, thread):
    """Format the output dictionary .

    Args:
    ----
        json_dct: dict
        title: Selenium WebElement
        location: Selenium WebElement
        company: Selenium WebElement
        date: Selenium WebElement
        thread: RequestThreadInfo object

    Return:
    ------
        json_dct: dct
    """

    thread.join()
    new_json = {}
    new_json['nom_du_poste'] = title.text
    new_json['entreprise'] = company.text
    new_json['date_publication'] = date
    try:
        lieu = Cleaner.arrondissement_paris(location.text, thread.posting_txt)
        new_json['lieu'] = lieu
    except:
        pass

    try:
        salaire, contrat = Cleaner.parser(thread.posting_txt)
        new_json['salaire'] = salaire
        new_json['type_de_contrat'] = contrat
    except:
        pass

    try:
        new_json['tags'] = Cleaner.tags(thread.posting_txt)
    except:
        pass

    return new_json
Ejemplo n.º 12
0
    def cb_wipe_free_space(self, action):
        """callback to wipe free space in arbitrary folder"""
        path = GuiBasic.browse_folder(self.window, _("Choose a folder"), multiple=False, stock_button=gtk.STOCK_OK)
        if not path:
            # user cancelled
            return

        backends["_gui"] = Cleaner.create_wipe_cleaner(path)

        # execute
        operations = {"_gui": ["free_disk_space"]}
        self.preview_or_run_operations(True, operations)
def main(id, key):
    id = int(id)
    # use creds to create a client to interact with the Google Drive API
    scope = [
        'https://spreadsheets.google.com/feeds',
        'https://www.googleapis.com/auth/drive'
    ]
    creds = ServiceAccountCredentials.from_json_keyfile_name(
        'client_secret.json', scope)
    client = gspread.authorize(creds)
    txt = list(np.load("results.npy"))
    a, b = [], []
    for t in txt:
        for q in t:
            a.append(q)
        b.append(a)
        a = []
    # Find a workbook by name and open the first sheet
    # Make sure you use the right name here.
    if id == 0:
        sheet = client.open("GenericForm").sheet1
    elif (id == 1):

        sheet = client.open("MedicalForm").sheet1

    #for k in b :
    #print(type(b), b)
    #    sheet.insert_row(k, 2)
    sheet.insert_row([
        key,
    ], 2)

    #print(r)
    # Extract and print all of the values
    #list_of_hashes = sheet.get_all_records()
    #print(list_of_hashes)

    #row = ["I'm","inserting","a","row","into","a,","Spreadsheet","with","=IMAGE('FromPhone\\img2.jpg')"]
    #sheet.insert_row(row, 1)
    Cleaner.main()
Ejemplo n.º 14
0
def preprocessing(StemmedDict,fileName): 
	v = set()
	f = open(fileName,'r')
	for line in f:
		line = line.strip()
		#print line
		lineCleaned = Cleaner.getProcessedData(line,1)
		#print lineCleaned
        	Id = lineCleaned.split('\x01')[0]
		lineStem = StemmerClass.Stemmer()
		if not Id in v:
       			v.add(Id)
			StemmedDict[Id] = lineStem.getStemmedCorpus(lineCleaned)
Ejemplo n.º 15
0
    def shred_paths(self, paths):
        """Shred file or folders"""
        # create a temporary cleaner object
        backends['_gui'] = Cleaner.create_simple_cleaner(paths)

        # preview and confirm
        operations = {'_gui': ['files']}
        self.preview_or_run_operations(False, operations)

        if GuiBasic.delete_confirmation_dialog(self.window, mention_preview=False):
            # delete
            self.preview_or_run_operations(True, operations)
            return True
        return False
Ejemplo n.º 16
0
    def cb_wipe_free_space(self, action):
        """callback to wipe free space in arbitrary folder"""
        path = GuiBasic.browse_folder(self.window,
                                      _("Choose a folder"),
                                      multiple=False, stock_button=gtk.STOCK_OK)
        if not path:
            # user cancelled
            return

        backends['_gui'] = Cleaner.create_wipe_cleaner(path)

        # execute
        operations = {'_gui': ['free_disk_space']}
        self.preview_or_run_operations(True, operations)
Ejemplo n.º 17
0
def createFeature(fileName, docRumourScore,docFactScore,classLabel):
        f = open(fileName,'r')
	rmax = max(docRumourScore.itervalues())
	rmin = min(docRumourScore.itervalues())
	lmax = max(docFactScore.itervalues())
	lmin = min(docFactScore.itervalues())
        for line in f:
                line = line.strip()
                #print line
                lineCleaned = Cleaner.getProcessedData(line,1)
                data = lineCleaned.split('\x01')
		id = data[0]
                rumorScore = docRumourScore[id]
                factScore = docFactScore[id]
		liscence = 0 if data[3] == 'false' else 1
                defination = 0 if data[4] == 'sd' else 1
		views = float(data[5])
                print id+','+str((rumorScore-rmin)/rmax)+','+str((factScore-lmin)/lmax)+','+str(liscence)+','+str(defination)+','+str(float(data[6])/views)+','+str(float(data[7])/views)+','+str(float(data[8])/views)+','+str(float(data[9])/views)+","+str(classLabel)
Ejemplo n.º 18
0
def scrape_job_page(driver, job_title, job_location):
    """Scrape a page of jobs from Monster.

    Grab everything that is possible (or relevant) for each of the jobs posted
    for a given page. This will typically include the job title, job location,
    posting company, the date posted, and the posting text.

    Args:
    ----
        driver: Selenium webdriver
        job_title: str
        job_location: str
    """
    global job_list
    titles, locations, companies, dates, hrefs = query_for_data(driver)

    current_date = str(datetime.datetime.now(pytz.timezone('Europe/Paris')))
    json_dct = {'search_title': job_title, \
                'search_location': job_location, \
                'search_date': current_date, 'job_site': 'monster'}

    thread_lst = []
    for href in hrefs:
        try:
            thread = HrefQueryThread(href.get_attribute('href'))
        except:
            print('Exception in href thread builder')
            thread = HrefQueryThread('')
        thread_lst.append(thread)
        thread.start()
    for title, location, company, date, thread in \
            zip(titles, locations, companies, dates, thread_lst):
        date_txt = Cleaner.date_exacte(date.text)

        try:
            small_dict = gen_small_output(title, location, company, date_txt,
                                          thread)
        except:
            print('Missed element in Monster!')

        try:
            job_list.append(small_dict)
        except IOError as err:
            print(err)
Ejemplo n.º 19
0
    def __init__(self, pathname, xlate_cb=None):
        """Create cleaner from XML in pathname.

        If xlate_cb is set, use it as a callback for each
        translate-able string.
        """

        self.action = None
        self.cleaner = Cleaner.Cleaner()
        self.option_id = None
        self.option_name = None
        self.option_description = None
        self.option_warning = None
        self.xlate_cb = xlate_cb
        if None == self.xlate_cb:
            self.xlate_cb = lambda x, y=None: None  # do nothing

        dom = xml.dom.minidom.parse(pathname)

        self.handle_cleaner(dom.getElementsByTagName('cleaner')[0])
Ejemplo n.º 20
0
import CaptchaGetter as CG
import CropImages as CI
import Cleaner as CL
import os

img_extension = 'png'
captcha_getter_obj = CG.CaptchaGetter(100, img_extension, '../')
path_to_captcha_imgs = captcha_getter_obj.get_dump_path()
captcha_getter_obj.dump_images()
img_cropper = CI.CropImages(path_to_captcha_imgs, img_extension, '../')
img_cropper.crop_and_save_images()
cropped_images_path = img_cropper.get_cropped_images_path()
img_cleaner = CL.Cleaner(cropped_images_path, img_extension, '../')
img_cleaner.clean_images()


# ================ CREATES LABELLED_DATA FOLDER ================ 
labelled_images_path_root = os.path.abspath('../labelled_data')

if not os.path.isdir(labelled_images_path_root):
    os.mkdir(labelled_images_path_root)

for i in range(10):
    if not os.path.isdir(os.path.join(labelled_images_path_root, str(i))):
        os.mkdir(os.path.join(labelled_images_path_root, str(i)))

# after this, manual labelling is needed. sort the files into their folder under the 'labelled_data' folder
Ejemplo n.º 21
0
 def __init__(self):
     Daemon.__init__(self, pidfile='/tmp/floucapt.pid', stdout='/tmp/floucapt.log', stderr='/tmp/floucapt.error')
     self.quit    = False
     self.logger  = Logger()
     self.cleaner = Cleaner()
Ejemplo n.º 22
0
class DaemonImpl(Daemon):
    """
    This class is a concrete implementation of abstract
    daemon.
    """
    def __init__(self):
        Daemon.__init__(self, pidfile='/tmp/floucapt.pid', stdout='/tmp/floucapt.log', stderr='/tmp/floucapt.error')
        self.quit    = False
        self.logger  = Logger()
        self.cleaner = Cleaner()


    """

    """
    def signal_handler(self, signal, frame):
        self.quit = True

    """
    This method save the times for the next pause of application
    This variable will be used in the method time_diff()
    """
    def time_start(self):
        self.startTime = time.time()

    """
    This method calculate the remaining time before the next contact camera
    Then we put the application pauses

    With this, the real frequency of contact with the camera is exactly the same as defined in the config file
    """
    def time_diff(self):
        endTime = time.time()
        elapsed = endTime - self.startTime
        pause = self.freqPictures - elapsed

        if pause > 0:
            time.sleep(pause)


    """


    """
    def run(self):
        # Redirection of signal to close properly application
        signal.signal(signal.SIGINT , self.signal_handler)
        signal.signal(signal.SIGTERM, self.signal_handler)

        # Load the configuration from the config file
        self.freqPictures, link, floucaptFolder = loadConfig(self.logger)


        # Main loop
        # Handles contact with the camera,
        # processing and recording the image
        # the call to cleaner
        # the process to sleep
        #
        # and errors
        while not self.quit:

            # Save the time
            self.time_start()


            # Retrieve the image from Camera
            # Detection of humans faces
            # Blur faces
            # Save picture in folder
            try:
                img = Camera.getPicture( self.logger, link )

                rects = PictureProcessing.detectFaces( self.logger, img )
                img = PictureProcessing.smoothFaces( rects, img )
                PictureProcessing.savePicture( self.logger, floucaptFolder, img )

            except Exception, e:
                # if an error occurs, we write the error code in the file picture.txt
                PictureProcessing.writeTxtFileError(self.logger, floucaptFolder, e.args[0] )
                del e   # Delete varaible memory



            # Delete variables in memory
            try:
                del img
                del rects
            except NameError:
                pass


            # Call the cleaner
            # If the date has not changed, it does nothing
            self.cleaner.run(floucaptFolder)


            # If the quit signal was not sent
            # Then the application wait before the next contact of camera
            if not self.quit:
                self.time_diff()
Ejemplo n.º 23
0
		global IP_ADDRESS, PORT
		data = {}
		data['ip_address'] = IP_ADDRESS
		data['port'] = PORT
		data['endpoints'] = []
		data['endpoints'].append('/devices')
		data['endpoints'].append('/users')
		data['endpoints'].append('/services')
		data = json.dumps(data)
		return data

if __name__ == "__main__":
	conf = {
		'/' : {
			'request.dispatch' : cherrypy.dispatch.MethodDispatcher(),
		}
	}
	
	cherrypy.tree.mount(BrokerInfo(), '/', conf)
	cherrypy.tree.mount(DeviceManager.DeviceManager(), '/devices', conf)
	cherrypy.tree.mount(ServiceManager.ServiceManager(), '/services', conf)
	cherrypy.tree.mount(UserManager.UserManager(), '/users', conf)

	cherrypy.config.update({
		'server.socket_host' : IP_ADDRESS,
		'server.socket_port' : PORT
		})

	Cleaner.Cleaner(0, 'cleaner_thread', 0).start()
	cherrypy.engine.start()
	cherrypy.engine.block()
Ejemplo n.º 24
0
# importo le librerie
import csv
import Cleaner
import sys

csv.field_size_limit(sys.maxsize)  # risolve il problema di overflow

with open('File_Parsered.csv', 'rt', encoding='utf8') as f, \
        open('/Users/robertopenna/Desktop/Archivio/UNIMIB/Stage/JST-master/data/MR.dat', 'wt', encoding='utf8') as d:
    csv_f = csv.reader(f)
    next(csv_f)

    for row in csv_f:

        idtweet = row[0]
        string = row[1].lower()
        string_clean = Cleaner.clean(string)
        string_noTW = Cleaner.remove_stopW(string_clean)
        string_fin = string_noTW.replace('é', 'e').replace('ò', 'o').replace('è', 'e').replace('à', 'a').replace('ù', 'u')

        if string_fin != "":
            d.write('Tweet' + idtweet + ' ' + string_fin + '\n')
Ejemplo n.º 25
0
def main():
    config = configparser.ConfigParser()
    config.read("../config.ini")
    config.sections()
    config_reader = config['DEFAULT']

    # Count of elements in pkl files
    max_in_file = int(config_reader['count'])

    instances_ngrams_last_dict_index = int(config_reader['insDicLast'])
    patterns_ngrams_last_dict_index = int(config_reader['patDicLast'])

    # Flag for using morph info
    use_morph = False
    if (int(config_reader['morph']) == 1):
        use_morph = True

    # Initialising dictionaries for storing ngrams in RAM
    ins_ngrams = dict()
    pat_ngrams = dict()
    ins_length = 0
    pat_length = 0

    # ngrams_mode for ngrams calculation
    ngrams_mode = int(config_reader['ngrams'])

    username = config_reader['u']
    password = config_reader['p']
    now_category = config_reader['c']

    connect_to_database(username, password, "localhost", 27017, now_category)

    # Extracting initial ontology
    if (int(config_reader['dontinit']) != 1):
        inizialize()

    if now_category == "all":
        now_category = ""

    # getting text from files and building indexes
    if not (int(config_reader['dontindex']) == 1):
        TextProcesser.build_indexes_sceleton(db)
        TextProcesser.preprocess_files(db, now_category)

    # really fast method. saves ngrams in ram. use it in case of not too large texts.
    if ngrams_mode == 1:
        pat_ngrams = TextProcesser.calc_ngrams_pat(db)
        print('pat_ngrams_length=' + str(len(pat_ngrams)))
        ins_ngrams = TextProcesser.calc_ngrams_instances(db)
        print('ins_ngrams_length=' + str(len(ins_ngrams)))

    # method using pkl files.
    if ngrams_mode == 2:
        pat_length = TextProcesser.ngrams_patterns_pkl(
            db, max_in_file, patterns_ngrams_last_dict_index, now_category)
        ins_length = TextProcesser.ngrams_instances_pkl(
            db, max_in_file, instances_ngrams_last_dict_index, now_category)

    iters = int(config_reader['i']) + 1

    threshold_mode = int(config_reader['tMode'])
    threshold_k_factor = float(config_reader['tK'])
    fixed_threshols_between_zero_and_one = float(config_reader['tT'])
    threshold_fixed_n = int(config_reader['tN'])

    for iteration in range(1, iters):
        startTime = time.time()
        print('Iteration [%s] begins' % str(iteration))
        logging.info('=============ITERATION [%s] BEGINS=============' %
                     str(iteration))
        InstanceExtractor.extract_instances(db, iteration, use_morph)
        InstanceExtractor.evaluate_instances(
            db, fixed_threshols_between_zero_and_one, threshold_mode,
            threshold_k_factor, threshold_fixed_n, iteration, ins_ngrams,
            ngrams_mode, ins_length, now_category)
        PatternExtractor.extract_patterns(db, iteration)
        PatternExtractor.evaluate_patterns(
            db, fixed_threshols_between_zero_and_one, threshold_mode,
            threshold_k_factor, threshold_fixed_n, iteration, pat_ngrams,
            ngrams_mode, pat_length, now_category)
        Cleaner.zero_coocurence_count(db)
        SubPatterns.filter_all_patterns(db)
        print('Iteration time: {:.3f} sec'.format(time.time() - startTime))
Ejemplo n.º 26
0
else:
    import USCCrawler2 as USCC
    List = USCC.USCCrawl()

#Either launch the USC cleaner or load a checkpoint file if one exists.

if os.path.exists(Setup.jsonFile):
    #Load a dataframe from a json file.
    import Setup
    jsonFile = Setup.jsonFile
    uscDF = pd.read_json(jsonFile, orient='index')
else:

    import Cleaner
    #Run Cleaning script to generate dictionary
    Dictionary = Cleaner.Clean(List)
    #Convert dictionary into pandas dataframe.
    uscDF = pd.DataFrame.from_dict(Dictionary, orient='index')
    print("Cleaned Data")

#Either launch the UCLA Crawler and cleaner or load a checkpoint file if one exists.

if os.path.exists(Setup.UCLAClean):
    import json
    jsonFile = Setup.UCLAClean
    with open(jsonFile, 'r') as inFile:
        UCLADict = json.load(inFile)
else:
    import UCLACleaner
    import UCLAScraper
Ejemplo n.º 27
0
seed        = 777 #For reproducibility

opt_adam = Adam(lr = learn_rate, beta_1 = beta_1, beta_2 = beta_2, epsilon = epsilon, decay = decay_rate, amsgrad = amsgrad)

def fetch_profiles(filename, n):
    f           = open(filename, 'r')
    profiles    = f.read().splitlines()
    f.close()
    return(list(set(profiles[:n])))

sqlite_file = '../../data/database/deeplearning.sqlite'
profilename = '../../data/profiles.txt'
table_name  = 'tweets'
profiles    = fetch_profiles(profilename, 15)
profiles    = [p.strip('@') for p in profiles]
cd          = c.CleanData(sqlite_file, table_name)
q           = 'SELECT * FROM {} WHERE AUTHOR IN ("{}");'.format(table_name, '", "'.join(profiles))

word_model = Word2Vec.load("word2vec.model")

np.random.seed(seed)

def word2idx(word):
  return word_model.wv.vocab[word].index
def idx2word(idx):
  return word_model.wv.index2word[idx]

cd.set_table(q)
raw_data = cd.get_clean_table()
raw_data = raw_data.CleanText.values
data = ''
Ejemplo n.º 28
0
    def __init__(self):
        cleaner = Cleaner.getClean()
        data = {}
        data = cleaner.cleaner()

        GUI.GUI(data)
Ejemplo n.º 29
0
class_setter = {
    'rt': True,
    'hashtag': True,
    'mention': True,
    'polytonic': True,
    'links': True,
    'numbers': True,
    'only_alpha': True,
    'consecutives': True,
    'stopWords': True,
    'lower': True,
    'punctuation': True
}

# Load Cleaner -- TO IMPLEMENT
cleaner = Cleaner(class_setter)

# Load Greek core from spacy
nlp = spacy.load('el_core_news_md')


def init_greek_lexicon(greek_sentiment_terms):
    greek_lexicon = {}

    for term in greek_sentiment_terms:
        term_sentiment = term['sentiment']

        greek_lexicon[term['_id']] = {
            'positive': term_sentiment['PosScore'],
            'negative': term_sentiment['NegScore'],
            'objective': term_sentiment['ObjScore']
Ejemplo n.º 30
0
        data = {}
        data['ip_address'] = IP_ADDRESS
        data['port'] = PORT
        data['endpoints'] = []
        data['endpoints'].append('/devices')
        data['endpoints'].append('/users')
        data['endpoints'].append('/services')
        data = json.dumps(data)
        return data


if __name__ == "__main__":
    conf = {
        '/': {
            'request.dispatch': cherrypy.dispatch.MethodDispatcher(),
        }
    }

    cherrypy.tree.mount(BrokerInfo(), '/', conf)
    cherrypy.tree.mount(DeviceManager.DeviceManager(), '/devices', conf)
    cherrypy.tree.mount(ServiceManager.ServiceManager(), '/services', conf)
    cherrypy.tree.mount(UserManager.UserManager(), '/users', conf)

    cherrypy.config.update({
        'server.socket_host': IP_ADDRESS,
        'server.socket_port': PORT
    })

    Cleaner.Cleaner()
    cherrypy.engine.start()
    cherrypy.engine.block()
Ejemplo n.º 31
0
from Scraper import ScrapperClass
import Text2File

import Cleaner


sc= ScrapperClass()
myurl=sc.getUrl()

if(sc.validateUrl(myurl)):
	myreq=sc.getRequestToTheUrl(myurl)
	sc.scrapedContent(myreq)
	title=sc.getTitle()
	paragraph=sc.getParagraph()
	paragraph=Cleaner.textCleaner(paragraph)
	title=Cleaner.titleCleaner(title)
	Text2File.Text2File(title,paragraph)
else:
	print("Enter Correct Wikipedia Url")
	print(input())


Ejemplo n.º 32
0
import Cleaner
from jointsMap import Joints
import matplotlib.pyplot as plt
import math
import numpy as np
import LPF
import periodAnalysisUtils

file = 'inputs/assaf_45.skl'
joint = Joints.KneeLeft_X
#Cleaner.plotJointCentered(file, joint)
parts = Cleaner.plotJointCenteredPeriodicaly(file, joint)
dirty_fig = plt.figure()
without_outliers_fig = plt.figure()
clean_fig = plt.figure()
clean_and_wo = plt.figure()
for part in parts:
    frameSize = math.ceil(np.sqrt(len(parts)))
    dirty_sub = dirty_fig.add_subplot(frameSize*110 + parts.index(part)+1)
    time = zip(*part)[0]
    values = zip(*part)[1]
    dirty_sub.plot(time, values)
    
    dropped_values, dropped_time = periodAnalysisUtils.dropOutliers(values, time)
    wo_sub = without_outliers_fig.add_subplot(frameSize*110 + parts.index(part)+1)
    wo_sub.plot(dropped_time, dropped_values)
    
    clean_values, clean_time =  LPF.clean(values, time)
    clean_sub = clean_fig.add_subplot(frameSize*110 + parts.index(part)+1)
    clean_sub.plot(clean_time, clean_values)
    
Ejemplo n.º 33
0
import pandas as pd
import Cleaner
from sklearn import cross_validation
from sklearn.ensemble import RandomForestClassifier
import FeatureSelector

# read csv

star_wars = pd.read_csv("star_wars.csv", encoding="ISO-8859-1")

# clean data

star_wars = Cleaner.clean(star_wars)

# split into train and test data
star_wars_train = star_wars[:-200]
star_wars_test = star_wars[-200:]


# Initialize our algorithm with the default paramters
# n_estimators is the number of trees we want to make
# min_samples_split is the minimum number of rows we need to make a split
# min_samples_leaf is the minimum number of samples we can have at the place where a tree branch ends (the bottom points of the tree)
alg = RandomForestClassifier(random_state=1, n_estimators=10, min_samples_split=2, min_samples_leaf=1)

# Set predictors
predictors = ["SeenSW", "IsStarTrekFan", "Gender", "Age", "Income", "Education", "Location"]

# uncomment to check what features to use
# FeatureSelector.check(star_wars, predictors)