def Validate_Birthday(self, Given_Birthday, Given_Age): output = False error = "" clean = Cleaner() result = clean.Clean_Birthday(Given_Birthday) #Checks to see if the birthday was cleaned if (result[0] != None): if (result[1] == ""): date_Details = result[0].split("-") str_Day = date_Details[0] str_Month = date_Details[1] str_Year = date_Details[2] try: given_Birth_Date = datetime.date(int(str_Year), int(str_Month), int(str_Day)) today = datetime.datetime.now() should_Be_Age = today.year - given_Birth_Date.year - ( (today.month, today.day) < (given_Birth_Date.month, given_Birth_Date.day)) if should_Be_Age == Given_Age: output = True else: error = "The age given and birthday do not line up" except: error = "Birthday is not a valid date" else: error = result[1] else: error = "Birthday wasnt not in a logical format" return output, error
def mkfs(self, args): str = "" if len(args) > 2: str += "Usage: mkfs [-reuse]" return str brandnew = True if len(args) > 1: if args[1] == "-reuse": brandnew = False else: str += "Usage: mkfs [-reuse]" return str segmentmonitor = Cleaner.SegmentMonitor() Disk.disk = DiskClass(brandnew=brandnew) Segment.segmentmanager = SegmentManagerClass(segmentmonitor) InodeMap.inodemap = InodeMapClass() LFS.filesystem = LFSClass(initdisk=brandnew) if CLEANERFLAG: Cleaner.cleaner = Cleaner.CleanerClass(segmentmonitor) Cleaner.cleaner.start() #Starting the cleaner thread if brandnew: Inode.inodeidpool = 1 #Resetting this to 1 because mkfs might be run multiple times rootinode = Inode(isdirectory=True) #We make the root inode here else: LFS.filesystem.restore() return "1"
def main(): description = "Cleans up old backups to leave more room on the backup server." \ "\n\nE.g. python cleaner.py -p /path/to/archive -o 3:4 7:7." \ "\n\nThe example provided will keep an archive from every 4th day if it's more than 3 days old" \ " and archive every 7 days if it's more than a week old." \ "\n\nThe format of backups this script takes is BACKUP_SET-VERSION." parser = argparse.ArgumentParser( description=description, formatter_class=RawDescriptionHelpFormatter) parser.add_argument('-p', '--root-path', type=str, required=True, help='The root path of your backups.') parser.add_argument( '-o', '--options', type=str, required=True, nargs='*', help='Your age threshold and desired interval size separated by a colon' ) parser.add_argument('-f', '--force', action='store_true', help='Automatically confirms that you want to delete.') args = parser.parse_args() calc = Calculator(args.root_path, args.options, args.force) calc.calculate() cleaner = Cleaner(calc) cleaner.clean()
def run(): #Set home directory homedir = os.path.dirname(os.path.realpath(__file__)) #Get full path to reference genome file (must be in files folder) #referencefilepath = Functions.parent_dir(homedir) + '/files/hg19_whole_genome.fa' #Get full path to bidirectional hits file (must be in files folder) bidirectionalfilepath = Functions.parent_dir(homedir) + '/files/bidirectional_hits.merge.bed' #Get full path to motif database for tomtom (must be in files folder) tomtomdir = Functions.parent_dir(homedir) + '/files/HOCOMOCOv9_AD_MEME.txt' if boolean == True: print "Cleaning directory..." #Deletes all files and folders in given directory/TF/peak_files cl.run(directory) print "running main\npreparing files for MEME..." #Bedtools intersect on all *.bed* files , then bedtools merge to ensure non-overlapping intervals rc.run(directory) #Converts ConsolidatedPeak.merge.bed to ConsolidatedPeak.merge.fasta b2f.run(directory, referencefilepath) print "done\nrunning MEME..." #Runs MEME, FIMO, and TOMTOM on all ConsolidatedPeak.merge.fasta meme.run(directory, 10000000, 10000000, tomtomdir) print "done\nfixing FIMO files..." #Removes duplicates, orders, and eliminates first column of FIMO output files ff.run(directory) print "done\ngetting motif distances to i..." #Calculates motif distance to bidir center for each motif of each TF dist.run(directory, bidirectionalfilepath, homedir) print "done\ngenerating overlap numbers..." #Determines site overlap between bidir, ChIP, and FIMO sites so.run(directory, bidirectionalfilepath, homedir) print "done"
def __init__(self): Cleaner.main() super(MainWindow, self).__init__() self.setGeometry(50, 50, 1200, 800) self.setWindowTitle("ReForm IT") session_id = QLabel(self) session_id.setText("Session_key:" + str(MainWindow.sk)) session_id.move(800, 0) session_id.resize(300, 50) self.startWindow1()
def initUI(self): newfont = QFont("Times", 22, QFont.Bold) self.finalLabel = QLabel(self) self.finalLabel.setText( "Your data will soon be updated to the client database. Please don't close the application." ) self.finalLabel.setFont(newfont) self.finalLabel.move(200, 300) self.finalLabel.resize(1000, 70) Uploader.main(MainWindow.form_id, MainWindow.sk) Cleaner.main() self.finalLabel.setText("Data Uploaded! You may now exit!")
def Validate_Age(self, Given_Age): clean = Cleaner() result = clean.Clean_Age(Given_Age) error = "" output = False #Checks to see if the Cleaner could clean the Given_Age if (result[0] != None): current_Age = result[0] #Checks to see if current_Age is within the 0-99 range if 0 <= current_Age <= 99: output = True else: error = "Age not between 0 and 99" else: error = result[1] return output, error
def add_section(self, cleaner_id, name): """Add a section (cleaners)""" self.cleaner_ids.append(cleaner_id) self.cleaners[cleaner_id] = Cleaner.Cleaner() self.cleaners[cleaner_id].id = cleaner_id self.cleaners[cleaner_id].name = name self.cleaners[cleaner_id].description = _('Imported from winapp2.ini')
def run(self): """ Start processing. """ # parse the command line arguments and set logging options try: self.args = self.parser.parse_args() self.configureLogging() self.logger.info("Started with {0}".format(' '.join(sys.argv[1:]))) except Exception as e: self.parser.print_help() sys.exit(e) # load the configuration file try: with open(self.args.config) as f: self.config.readfp(f) except Exception as e: self.logger.critical("Could not load the specified configuration file") sys.exit(e) # set options Cfg.LOG_EXC_INFO = self.args.trace # execute commands with Timer.Timer() as t: if self.args.crawl: import Crawler Crawler.crawl(self.config, self.args.update) if self.args.clean: import Cleaner Cleaner.clean(self.config, self.args.update) if self.args.infer: import Facter Facter.infer(self.config, self.args.update) if self.args.graph: import Grapher Grapher.graph(self.config, self.args.update) if self.args.transform: import Transformer Transformer.transform(self.config) if self.args.post: import Poster Poster.post(self.config) if self.args.analyze: import Analyzer Analyzer.analyze(self.config, self.args.update) self.logger.info("Indexer finished in {0}:{1}:{2}".format(t.hours, t.minutes, t.seconds))
def create_training_data(): print("Loading articles... This may take a while") t_start = time.time() articles = [] for root, dirnames, filenames in os.walk('./Articles'): for filename in fnmatch.filter(filenames, '*.txt'): articles.append(os.path.join(root, filename)) print("Loading articles complete. Took {0} seconds...".format(time.time() - t_start)) # Questions # Q1 in_random_articles = input("Use random articles? [y/N]") if in_random_articles == "y": random.shuffle(articles) in_random_articles = True # Q2 in_clean_file = input("Clean articles [Y/n]") if in_clean_file == "n": in_clean_file = False else: in_clean_file = True # Q3 in_num_articles = input("Number or articles? [Default: 10]") try: num_articles = int(in_num_articles) except: num_articles = 10 selected_articles = articles[0:min(len(articles), num_articles)-1] try: os.mkdir("./Training") except: pass training_filename = "Training-{0}-{1}-{2}-{3}.txt".format( \ "Clean" if in_clean_file == True else "Dirty", \ "Shuffle" if in_random_articles else "Iterate", \ num_articles, \ str(uuid.uuid4())[:8]) for article in selected_articles: with codecs.open("./Training/" + training_filename, "a+", encoding="utf8") as file: with codecs.open(article,'r', encoding="utf8") as f: content = f.read() if in_clean_file == True: content = Cleaner.clean(content) file.write(content) print("Created Training set named: {0}".format(training_filename))
def gen_small_output(title, location, company, date, thread): """Format the output dictionary . Args: ---- json_dct: dict title: Selenium WebElement location: Selenium WebElement company: Selenium WebElement date: Selenium WebElement thread: RequestThreadInfo object Return: ------ json_dct: dct """ thread.join() new_json = {} new_json['nom_du_poste'] = title.text new_json['entreprise'] = company.text new_json['date_publication'] = date try: lieu = Cleaner.arrondissement_paris(location.text, thread.posting_txt) new_json['lieu'] = lieu except: pass try: salaire, contrat = Cleaner.parser(thread.posting_txt) new_json['salaire'] = salaire new_json['type_de_contrat'] = contrat except: pass try: new_json['tags'] = Cleaner.tags(thread.posting_txt) except: pass return new_json
def cb_wipe_free_space(self, action): """callback to wipe free space in arbitrary folder""" path = GuiBasic.browse_folder(self.window, _("Choose a folder"), multiple=False, stock_button=gtk.STOCK_OK) if not path: # user cancelled return backends["_gui"] = Cleaner.create_wipe_cleaner(path) # execute operations = {"_gui": ["free_disk_space"]} self.preview_or_run_operations(True, operations)
def main(id, key): id = int(id) # use creds to create a client to interact with the Google Drive API scope = [ 'https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive' ] creds = ServiceAccountCredentials.from_json_keyfile_name( 'client_secret.json', scope) client = gspread.authorize(creds) txt = list(np.load("results.npy")) a, b = [], [] for t in txt: for q in t: a.append(q) b.append(a) a = [] # Find a workbook by name and open the first sheet # Make sure you use the right name here. if id == 0: sheet = client.open("GenericForm").sheet1 elif (id == 1): sheet = client.open("MedicalForm").sheet1 #for k in b : #print(type(b), b) # sheet.insert_row(k, 2) sheet.insert_row([ key, ], 2) #print(r) # Extract and print all of the values #list_of_hashes = sheet.get_all_records() #print(list_of_hashes) #row = ["I'm","inserting","a","row","into","a,","Spreadsheet","with","=IMAGE('FromPhone\\img2.jpg')"] #sheet.insert_row(row, 1) Cleaner.main()
def preprocessing(StemmedDict,fileName): v = set() f = open(fileName,'r') for line in f: line = line.strip() #print line lineCleaned = Cleaner.getProcessedData(line,1) #print lineCleaned Id = lineCleaned.split('\x01')[0] lineStem = StemmerClass.Stemmer() if not Id in v: v.add(Id) StemmedDict[Id] = lineStem.getStemmedCorpus(lineCleaned)
def shred_paths(self, paths): """Shred file or folders""" # create a temporary cleaner object backends['_gui'] = Cleaner.create_simple_cleaner(paths) # preview and confirm operations = {'_gui': ['files']} self.preview_or_run_operations(False, operations) if GuiBasic.delete_confirmation_dialog(self.window, mention_preview=False): # delete self.preview_or_run_operations(True, operations) return True return False
def cb_wipe_free_space(self, action): """callback to wipe free space in arbitrary folder""" path = GuiBasic.browse_folder(self.window, _("Choose a folder"), multiple=False, stock_button=gtk.STOCK_OK) if not path: # user cancelled return backends['_gui'] = Cleaner.create_wipe_cleaner(path) # execute operations = {'_gui': ['free_disk_space']} self.preview_or_run_operations(True, operations)
def createFeature(fileName, docRumourScore,docFactScore,classLabel): f = open(fileName,'r') rmax = max(docRumourScore.itervalues()) rmin = min(docRumourScore.itervalues()) lmax = max(docFactScore.itervalues()) lmin = min(docFactScore.itervalues()) for line in f: line = line.strip() #print line lineCleaned = Cleaner.getProcessedData(line,1) data = lineCleaned.split('\x01') id = data[0] rumorScore = docRumourScore[id] factScore = docFactScore[id] liscence = 0 if data[3] == 'false' else 1 defination = 0 if data[4] == 'sd' else 1 views = float(data[5]) print id+','+str((rumorScore-rmin)/rmax)+','+str((factScore-lmin)/lmax)+','+str(liscence)+','+str(defination)+','+str(float(data[6])/views)+','+str(float(data[7])/views)+','+str(float(data[8])/views)+','+str(float(data[9])/views)+","+str(classLabel)
def scrape_job_page(driver, job_title, job_location): """Scrape a page of jobs from Monster. Grab everything that is possible (or relevant) for each of the jobs posted for a given page. This will typically include the job title, job location, posting company, the date posted, and the posting text. Args: ---- driver: Selenium webdriver job_title: str job_location: str """ global job_list titles, locations, companies, dates, hrefs = query_for_data(driver) current_date = str(datetime.datetime.now(pytz.timezone('Europe/Paris'))) json_dct = {'search_title': job_title, \ 'search_location': job_location, \ 'search_date': current_date, 'job_site': 'monster'} thread_lst = [] for href in hrefs: try: thread = HrefQueryThread(href.get_attribute('href')) except: print('Exception in href thread builder') thread = HrefQueryThread('') thread_lst.append(thread) thread.start() for title, location, company, date, thread in \ zip(titles, locations, companies, dates, thread_lst): date_txt = Cleaner.date_exacte(date.text) try: small_dict = gen_small_output(title, location, company, date_txt, thread) except: print('Missed element in Monster!') try: job_list.append(small_dict) except IOError as err: print(err)
def __init__(self, pathname, xlate_cb=None): """Create cleaner from XML in pathname. If xlate_cb is set, use it as a callback for each translate-able string. """ self.action = None self.cleaner = Cleaner.Cleaner() self.option_id = None self.option_name = None self.option_description = None self.option_warning = None self.xlate_cb = xlate_cb if None == self.xlate_cb: self.xlate_cb = lambda x, y=None: None # do nothing dom = xml.dom.minidom.parse(pathname) self.handle_cleaner(dom.getElementsByTagName('cleaner')[0])
import CaptchaGetter as CG import CropImages as CI import Cleaner as CL import os img_extension = 'png' captcha_getter_obj = CG.CaptchaGetter(100, img_extension, '../') path_to_captcha_imgs = captcha_getter_obj.get_dump_path() captcha_getter_obj.dump_images() img_cropper = CI.CropImages(path_to_captcha_imgs, img_extension, '../') img_cropper.crop_and_save_images() cropped_images_path = img_cropper.get_cropped_images_path() img_cleaner = CL.Cleaner(cropped_images_path, img_extension, '../') img_cleaner.clean_images() # ================ CREATES LABELLED_DATA FOLDER ================ labelled_images_path_root = os.path.abspath('../labelled_data') if not os.path.isdir(labelled_images_path_root): os.mkdir(labelled_images_path_root) for i in range(10): if not os.path.isdir(os.path.join(labelled_images_path_root, str(i))): os.mkdir(os.path.join(labelled_images_path_root, str(i))) # after this, manual labelling is needed. sort the files into their folder under the 'labelled_data' folder
def __init__(self): Daemon.__init__(self, pidfile='/tmp/floucapt.pid', stdout='/tmp/floucapt.log', stderr='/tmp/floucapt.error') self.quit = False self.logger = Logger() self.cleaner = Cleaner()
class DaemonImpl(Daemon): """ This class is a concrete implementation of abstract daemon. """ def __init__(self): Daemon.__init__(self, pidfile='/tmp/floucapt.pid', stdout='/tmp/floucapt.log', stderr='/tmp/floucapt.error') self.quit = False self.logger = Logger() self.cleaner = Cleaner() """ """ def signal_handler(self, signal, frame): self.quit = True """ This method save the times for the next pause of application This variable will be used in the method time_diff() """ def time_start(self): self.startTime = time.time() """ This method calculate the remaining time before the next contact camera Then we put the application pauses With this, the real frequency of contact with the camera is exactly the same as defined in the config file """ def time_diff(self): endTime = time.time() elapsed = endTime - self.startTime pause = self.freqPictures - elapsed if pause > 0: time.sleep(pause) """ """ def run(self): # Redirection of signal to close properly application signal.signal(signal.SIGINT , self.signal_handler) signal.signal(signal.SIGTERM, self.signal_handler) # Load the configuration from the config file self.freqPictures, link, floucaptFolder = loadConfig(self.logger) # Main loop # Handles contact with the camera, # processing and recording the image # the call to cleaner # the process to sleep # # and errors while not self.quit: # Save the time self.time_start() # Retrieve the image from Camera # Detection of humans faces # Blur faces # Save picture in folder try: img = Camera.getPicture( self.logger, link ) rects = PictureProcessing.detectFaces( self.logger, img ) img = PictureProcessing.smoothFaces( rects, img ) PictureProcessing.savePicture( self.logger, floucaptFolder, img ) except Exception, e: # if an error occurs, we write the error code in the file picture.txt PictureProcessing.writeTxtFileError(self.logger, floucaptFolder, e.args[0] ) del e # Delete varaible memory # Delete variables in memory try: del img del rects except NameError: pass # Call the cleaner # If the date has not changed, it does nothing self.cleaner.run(floucaptFolder) # If the quit signal was not sent # Then the application wait before the next contact of camera if not self.quit: self.time_diff()
global IP_ADDRESS, PORT data = {} data['ip_address'] = IP_ADDRESS data['port'] = PORT data['endpoints'] = [] data['endpoints'].append('/devices') data['endpoints'].append('/users') data['endpoints'].append('/services') data = json.dumps(data) return data if __name__ == "__main__": conf = { '/' : { 'request.dispatch' : cherrypy.dispatch.MethodDispatcher(), } } cherrypy.tree.mount(BrokerInfo(), '/', conf) cherrypy.tree.mount(DeviceManager.DeviceManager(), '/devices', conf) cherrypy.tree.mount(ServiceManager.ServiceManager(), '/services', conf) cherrypy.tree.mount(UserManager.UserManager(), '/users', conf) cherrypy.config.update({ 'server.socket_host' : IP_ADDRESS, 'server.socket_port' : PORT }) Cleaner.Cleaner(0, 'cleaner_thread', 0).start() cherrypy.engine.start() cherrypy.engine.block()
# importo le librerie import csv import Cleaner import sys csv.field_size_limit(sys.maxsize) # risolve il problema di overflow with open('File_Parsered.csv', 'rt', encoding='utf8') as f, \ open('/Users/robertopenna/Desktop/Archivio/UNIMIB/Stage/JST-master/data/MR.dat', 'wt', encoding='utf8') as d: csv_f = csv.reader(f) next(csv_f) for row in csv_f: idtweet = row[0] string = row[1].lower() string_clean = Cleaner.clean(string) string_noTW = Cleaner.remove_stopW(string_clean) string_fin = string_noTW.replace('é', 'e').replace('ò', 'o').replace('è', 'e').replace('à', 'a').replace('ù', 'u') if string_fin != "": d.write('Tweet' + idtweet + ' ' + string_fin + '\n')
def main(): config = configparser.ConfigParser() config.read("../config.ini") config.sections() config_reader = config['DEFAULT'] # Count of elements in pkl files max_in_file = int(config_reader['count']) instances_ngrams_last_dict_index = int(config_reader['insDicLast']) patterns_ngrams_last_dict_index = int(config_reader['patDicLast']) # Flag for using morph info use_morph = False if (int(config_reader['morph']) == 1): use_morph = True # Initialising dictionaries for storing ngrams in RAM ins_ngrams = dict() pat_ngrams = dict() ins_length = 0 pat_length = 0 # ngrams_mode for ngrams calculation ngrams_mode = int(config_reader['ngrams']) username = config_reader['u'] password = config_reader['p'] now_category = config_reader['c'] connect_to_database(username, password, "localhost", 27017, now_category) # Extracting initial ontology if (int(config_reader['dontinit']) != 1): inizialize() if now_category == "all": now_category = "" # getting text from files and building indexes if not (int(config_reader['dontindex']) == 1): TextProcesser.build_indexes_sceleton(db) TextProcesser.preprocess_files(db, now_category) # really fast method. saves ngrams in ram. use it in case of not too large texts. if ngrams_mode == 1: pat_ngrams = TextProcesser.calc_ngrams_pat(db) print('pat_ngrams_length=' + str(len(pat_ngrams))) ins_ngrams = TextProcesser.calc_ngrams_instances(db) print('ins_ngrams_length=' + str(len(ins_ngrams))) # method using pkl files. if ngrams_mode == 2: pat_length = TextProcesser.ngrams_patterns_pkl( db, max_in_file, patterns_ngrams_last_dict_index, now_category) ins_length = TextProcesser.ngrams_instances_pkl( db, max_in_file, instances_ngrams_last_dict_index, now_category) iters = int(config_reader['i']) + 1 threshold_mode = int(config_reader['tMode']) threshold_k_factor = float(config_reader['tK']) fixed_threshols_between_zero_and_one = float(config_reader['tT']) threshold_fixed_n = int(config_reader['tN']) for iteration in range(1, iters): startTime = time.time() print('Iteration [%s] begins' % str(iteration)) logging.info('=============ITERATION [%s] BEGINS=============' % str(iteration)) InstanceExtractor.extract_instances(db, iteration, use_morph) InstanceExtractor.evaluate_instances( db, fixed_threshols_between_zero_and_one, threshold_mode, threshold_k_factor, threshold_fixed_n, iteration, ins_ngrams, ngrams_mode, ins_length, now_category) PatternExtractor.extract_patterns(db, iteration) PatternExtractor.evaluate_patterns( db, fixed_threshols_between_zero_and_one, threshold_mode, threshold_k_factor, threshold_fixed_n, iteration, pat_ngrams, ngrams_mode, pat_length, now_category) Cleaner.zero_coocurence_count(db) SubPatterns.filter_all_patterns(db) print('Iteration time: {:.3f} sec'.format(time.time() - startTime))
else: import USCCrawler2 as USCC List = USCC.USCCrawl() #Either launch the USC cleaner or load a checkpoint file if one exists. if os.path.exists(Setup.jsonFile): #Load a dataframe from a json file. import Setup jsonFile = Setup.jsonFile uscDF = pd.read_json(jsonFile, orient='index') else: import Cleaner #Run Cleaning script to generate dictionary Dictionary = Cleaner.Clean(List) #Convert dictionary into pandas dataframe. uscDF = pd.DataFrame.from_dict(Dictionary, orient='index') print("Cleaned Data") #Either launch the UCLA Crawler and cleaner or load a checkpoint file if one exists. if os.path.exists(Setup.UCLAClean): import json jsonFile = Setup.UCLAClean with open(jsonFile, 'r') as inFile: UCLADict = json.load(inFile) else: import UCLACleaner import UCLAScraper
seed = 777 #For reproducibility opt_adam = Adam(lr = learn_rate, beta_1 = beta_1, beta_2 = beta_2, epsilon = epsilon, decay = decay_rate, amsgrad = amsgrad) def fetch_profiles(filename, n): f = open(filename, 'r') profiles = f.read().splitlines() f.close() return(list(set(profiles[:n]))) sqlite_file = '../../data/database/deeplearning.sqlite' profilename = '../../data/profiles.txt' table_name = 'tweets' profiles = fetch_profiles(profilename, 15) profiles = [p.strip('@') for p in profiles] cd = c.CleanData(sqlite_file, table_name) q = 'SELECT * FROM {} WHERE AUTHOR IN ("{}");'.format(table_name, '", "'.join(profiles)) word_model = Word2Vec.load("word2vec.model") np.random.seed(seed) def word2idx(word): return word_model.wv.vocab[word].index def idx2word(idx): return word_model.wv.index2word[idx] cd.set_table(q) raw_data = cd.get_clean_table() raw_data = raw_data.CleanText.values data = ''
def __init__(self): cleaner = Cleaner.getClean() data = {} data = cleaner.cleaner() GUI.GUI(data)
class_setter = { 'rt': True, 'hashtag': True, 'mention': True, 'polytonic': True, 'links': True, 'numbers': True, 'only_alpha': True, 'consecutives': True, 'stopWords': True, 'lower': True, 'punctuation': True } # Load Cleaner -- TO IMPLEMENT cleaner = Cleaner(class_setter) # Load Greek core from spacy nlp = spacy.load('el_core_news_md') def init_greek_lexicon(greek_sentiment_terms): greek_lexicon = {} for term in greek_sentiment_terms: term_sentiment = term['sentiment'] greek_lexicon[term['_id']] = { 'positive': term_sentiment['PosScore'], 'negative': term_sentiment['NegScore'], 'objective': term_sentiment['ObjScore']
data = {} data['ip_address'] = IP_ADDRESS data['port'] = PORT data['endpoints'] = [] data['endpoints'].append('/devices') data['endpoints'].append('/users') data['endpoints'].append('/services') data = json.dumps(data) return data if __name__ == "__main__": conf = { '/': { 'request.dispatch': cherrypy.dispatch.MethodDispatcher(), } } cherrypy.tree.mount(BrokerInfo(), '/', conf) cherrypy.tree.mount(DeviceManager.DeviceManager(), '/devices', conf) cherrypy.tree.mount(ServiceManager.ServiceManager(), '/services', conf) cherrypy.tree.mount(UserManager.UserManager(), '/users', conf) cherrypy.config.update({ 'server.socket_host': IP_ADDRESS, 'server.socket_port': PORT }) Cleaner.Cleaner() cherrypy.engine.start() cherrypy.engine.block()
from Scraper import ScrapperClass import Text2File import Cleaner sc= ScrapperClass() myurl=sc.getUrl() if(sc.validateUrl(myurl)): myreq=sc.getRequestToTheUrl(myurl) sc.scrapedContent(myreq) title=sc.getTitle() paragraph=sc.getParagraph() paragraph=Cleaner.textCleaner(paragraph) title=Cleaner.titleCleaner(title) Text2File.Text2File(title,paragraph) else: print("Enter Correct Wikipedia Url") print(input())
import Cleaner from jointsMap import Joints import matplotlib.pyplot as plt import math import numpy as np import LPF import periodAnalysisUtils file = 'inputs/assaf_45.skl' joint = Joints.KneeLeft_X #Cleaner.plotJointCentered(file, joint) parts = Cleaner.plotJointCenteredPeriodicaly(file, joint) dirty_fig = plt.figure() without_outliers_fig = plt.figure() clean_fig = plt.figure() clean_and_wo = plt.figure() for part in parts: frameSize = math.ceil(np.sqrt(len(parts))) dirty_sub = dirty_fig.add_subplot(frameSize*110 + parts.index(part)+1) time = zip(*part)[0] values = zip(*part)[1] dirty_sub.plot(time, values) dropped_values, dropped_time = periodAnalysisUtils.dropOutliers(values, time) wo_sub = without_outliers_fig.add_subplot(frameSize*110 + parts.index(part)+1) wo_sub.plot(dropped_time, dropped_values) clean_values, clean_time = LPF.clean(values, time) clean_sub = clean_fig.add_subplot(frameSize*110 + parts.index(part)+1) clean_sub.plot(clean_time, clean_values)
import pandas as pd import Cleaner from sklearn import cross_validation from sklearn.ensemble import RandomForestClassifier import FeatureSelector # read csv star_wars = pd.read_csv("star_wars.csv", encoding="ISO-8859-1") # clean data star_wars = Cleaner.clean(star_wars) # split into train and test data star_wars_train = star_wars[:-200] star_wars_test = star_wars[-200:] # Initialize our algorithm with the default paramters # n_estimators is the number of trees we want to make # min_samples_split is the minimum number of rows we need to make a split # min_samples_leaf is the minimum number of samples we can have at the place where a tree branch ends (the bottom points of the tree) alg = RandomForestClassifier(random_state=1, n_estimators=10, min_samples_split=2, min_samples_leaf=1) # Set predictors predictors = ["SeenSW", "IsStarTrekFan", "Gender", "Age", "Income", "Education", "Location"] # uncomment to check what features to use # FeatureSelector.check(star_wars, predictors)