def __init__(self, dataSource, dataSet): extractObj = Extract() if dataSource == 'api': self.data = extractObj.getAPISData(dataSet) elif dataSource == 'csv': self.data = extractObj.getCSVData(dataSet) else: self.data = extractObj.databases(dataSet)
def default(self, url1=None, url2=None, field=None): if not url1 or not url2 or not field: return dumps({'status':'error', 'message':'Need URIs and field'}) extract1 = Extract(url1, datapath, cachepath, extractorpath) extract2 = Extract(url2, datapath, cachepath, extractorpath) data1 = extract1.run() data2 = extract2.run() chart = ComparisonChart(url1, url2, data1, data2) url = chart.get_graph_url_for_field(field) return dumps({'status':'ok', 'url':url})
def process_image(self, image): file_converter = FileConverter() #convert the image to byte string image_bytes = file_converter.png_to_jpeg(image) scanner = Scanner() #scan the image and give it a birds eye view, returns a np of pixels that makes up the image scan_np = scanner.scan(image_bytes) #extract the individual answers from the scanned test extractor = Extract() answers = extractor.get_all_answers(scan_np, 5) color = Color() bw_answers = color.all_ans_to_bw(answers) size = Size() DIM = (28, 28) shrunk_images = size.shrink_images(bw_answers, DIM) #convert the answer images to a single array, which we used in training our model answers_flat = file_converter.convert_images( shrunk_images) #returns image as (1, 28, 28, 1) and as type float #now that we have a list of images of the answers as bw 1D numpy arrays, # we can run them through our model and grade them # first we need to load our model model_loader = ModelLoader() MODEL_JSON = 'models/modified_model_98.json' MODEL_WEIGHTS = 'models/modified_model_98.h5' model = model_loader.load_model_2(MODEL_JSON, MODEL_WEIGHTS) #compile model model.compile(optimizer=RMSprop(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy']) grader = Grader() answers = grader.get_answers(answers_flat, model) #get the images as a 784 (28x28) length string so we can store the data in a database ans_strings = file_converter.get_string_images(answers_flat) compressed_images = file_converter.compress_images(ans_strings) #add the images to database so we can create a large dataset of handwritten letters # storage = Storage() # storage.insert(answers, compressed_images) return answers
def default(self, url=None, fields=None): if not url or not fields: return dumps({'status':'error', 'message':'Need URI and fields'}) extract = Extract(url, datapath, cachepath, extractorpath) data = extract.run() chart = GoogleChart(data) field_arr = fields.split(",") nfield_arr = [] for field in field_arr: field = field.strip() if field != '': nfield_arr.append(field) if len(nfield_arr) == 1: url = chart.get_graph_url_for_field(nfield_arr[0]) else: url = chart.get_graph_url_for_fields(nfield_arr) return dumps({'status':'ok', 'url':url})
class Transform: def __init__(self, name): self.name = name self.extract = Extract(name) def get_sentiment(self, text): sentimentBlob = TextBlob(text) if (sentimentBlob.sentiment.polarity > 0): return ('Positive') else: return ('Negative') def get_stock_data(self): data = self.extract.get_stockprice_extract() stock_data = data.json() return stock_data def get_ner_dict(self, description): ner_list = [] spacy_nlp = en_core_web_sm.load() document = spacy_nlp(description) for element in document.ents: ner_list.append(str(element)) return ner_list def get_news_data(self): data = self.extract.get_news_extract() for article in data['articles']: if article['description'] is not None: sentiment = self.get_sentiment(article['description']) article['sentiment'] = sentiment ner_tags = self.get_ner_dict(article['description']) article['ner_tags'] = ner_tags else: article['sentiment'] = 'None' article['ner_tags'] = 'None' return data
class Driver: if __name__ == '__main__': # Extracting data from 5 excel files extract = Extract() ds,d,os,sa,ea= extract.getAllData() # Transforming data to obtain additional columns transform = Transform() ds,d= transform.transformAllData(ds,d) # Dropping unnecessary columns dropColumns = DropColumns() drivingSearch,delivery,orders,startAddresses,endAddresses= dropColumns.dropAllColumns(ds,d,os,sa,ea) # joining the 5 tables merge=Merge() finalData = merge.mergeAllTables(drivingSearch,delivery,orders,startAddresses,endAddresses) # Converting the date columns from Object type to DateTime finalData= transform.transformdate(finalData) sqlload=SQL_Load() sqlload.loadDataToStaging(finalData)
def MakeMsg(): global LastDeath global soup chdif=CheckNews() deathOfLapsus=soup.find_all(class_='page block celebrity blockList')[:chdif] listeOfDeathOfLapsus=[] for death in deathOfLapsus: tempsys={} bsfourElementTag=Extract(death) tempsys['name']=bsfourElementTag.XtractName() tempsys['info']=bsfourElementTag.XtractAreaLife() tempsys['text']=bsfourElementTag.XtractText() tempsys['tombstone']=bsfourElementTag.XtractDeathCause() listeOfDeathOfLapsus.append(tempsys) LastDeath=death.a.find('img',title=True)['title'] return listeOfDeathOfLapsus
'Content-Type': "application/x-www-form-urlencoded", 'Cookie': cookie, 'Host': "zxgk.court.gov.cn", 'Origin': "http://zxgk.court.gov.cn", 'Referer': "http://zxgk.court.gov.cn/shixin/index_form.do", 'Upgrade-Insecure-Requests': "1", 'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36", } data = { 'pName': info, 'pCardNum': '', 'pProvince': 0, 'pCode': pCode, 'captchaId': captchaId, } response = requests.post(findUrl, headers=headers, data=data) if_continue = Extract(response.text) cookie = Cook(cookie, response.cookies.get_dict()) cookie = Rep(cookie) plt.close() print('Close captcha')
def default(self, url=None): if not url: return dumps({'status':'error', 'message':'Need URL'}) extract = Extract(url, datapath, cachepath, extractorpath) data = extract.run() return dumps({'status':'ok', 'data':data})
else: break logger.debug( "Scraping Module : Total Links for : " + str(keyword) + " is : " + str(len(current_advertisment_links))) # Getting Search Box Selenium Element to Clear its Text before Inputting Next Text search_box = wait.until(EC.element_to_be_clickable((By.ID, 'SearchKeyword'))) time.sleep(2) search_box.clear() else: browser.close() browser.quit() logger.debug("Scraping Module : Starting Data Scraping") extract = Extract() logger.debug("Scraping Module : Final Processing For All Advertisements In Progress") current_timestamp = extract.extract_data(browser, advertisment_links, finalTimestamp_in_property, HandleProperties(), logger, province_name, city_name) updated_date = (date.today() - timedelta(days=1)).strftime("%d/%m/%Y") location_dictionary = openFile("r", "") province_dictionary = location_dictionary["province_dict"] city_dictionary = location_dictionary["city_dict"] province_name = province_dictionary.get(province_argument) cities_json = city_dictionary.get(province_argument) city_json = cities_json.get(city_argument) city_name = city_json.get("name") if type_argument == "w": search_type = "Wanted"
import numpy as np import sys, os if len(sys.argv) != 3: print 'Usage: python train_hog_human_detectory.py [lokacija baze] [mode]' print 'mode: 0 - train, 1 - test' sys.exit(0) if os.path.isdir(sys.argv[1]) == False: print 'Ne postoji direktorij', sys.argv[1] sys.exit(0) db = LoadDataSet(sys.argv[1]) cut = CutPicture(64.0, 128.0, 3) hog = HOG(9, (6,6), (3,3)) ex = Extract(hog, cut) if int(sys.argv[ 2 ]) == 0: print 'Generiranje znacajki...' pos, neg = db.loadTrainSet() lPos = 1178 lNeg = 1359 #X, y = ex.getSamples(pos, neg, lPos, lNeg) X, y = ex.getSamples(pos, neg) X = np.array(X).astype('float32') y = np.array(y).astype('float32')
def __init__(self, name): self.name = name self.extract = Extract(name)
def __init__(self, url: str, parameters:dict): Extract.__init__(self, url, parameters) self.scripts = [] self.scripts = Extract.get_from_tag(self,'script')