def __init__(self, dataSource, dataSet): extractObj = Extract() if dataSource == 'api': self.data = extractObj.getAPISData(dataSet) elif dataSource == 'csv': self.data = extractObj.getCSVData(dataSet) else: self.data = extractObj.databases(dataSet)
def process_image(self, image): file_converter = FileConverter() #convert the image to byte string image_bytes = file_converter.png_to_jpeg(image) scanner = Scanner() #scan the image and give it a birds eye view, returns a np of pixels that makes up the image scan_np = scanner.scan(image_bytes) #extract the individual answers from the scanned test extractor = Extract() answers = extractor.get_all_answers(scan_np, 5) color = Color() bw_answers = color.all_ans_to_bw(answers) size = Size() DIM = (28, 28) shrunk_images = size.shrink_images(bw_answers, DIM) #convert the answer images to a single array, which we used in training our model answers_flat = file_converter.convert_images( shrunk_images) #returns image as (1, 28, 28, 1) and as type float #now that we have a list of images of the answers as bw 1D numpy arrays, # we can run them through our model and grade them # first we need to load our model model_loader = ModelLoader() MODEL_JSON = 'models/modified_model_98.json' MODEL_WEIGHTS = 'models/modified_model_98.h5' model = model_loader.load_model_2(MODEL_JSON, MODEL_WEIGHTS) #compile model model.compile(optimizer=RMSprop(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy']) grader = Grader() answers = grader.get_answers(answers_flat, model) #get the images as a 784 (28x28) length string so we can store the data in a database ans_strings = file_converter.get_string_images(answers_flat) compressed_images = file_converter.compress_images(ans_strings) #add the images to database so we can create a large dataset of handwritten letters # storage = Storage() # storage.insert(answers, compressed_images) return answers
def MakeMsg(): global LastDeath global soup chdif=CheckNews() deathOfLapsus=soup.find_all(class_='page block celebrity blockList')[:chdif] listeOfDeathOfLapsus=[] for death in deathOfLapsus: tempsys={} bsfourElementTag=Extract(death) tempsys['name']=bsfourElementTag.XtractName() tempsys['info']=bsfourElementTag.XtractAreaLife() tempsys['text']=bsfourElementTag.XtractText() tempsys['tombstone']=bsfourElementTag.XtractDeathCause() listeOfDeathOfLapsus.append(tempsys) LastDeath=death.a.find('img',title=True)['title'] return listeOfDeathOfLapsus
class Driver: if __name__ == '__main__': # Extracting data from 5 excel files extract = Extract() ds,d,os,sa,ea= extract.getAllData() # Transforming data to obtain additional columns transform = Transform() ds,d= transform.transformAllData(ds,d) # Dropping unnecessary columns dropColumns = DropColumns() drivingSearch,delivery,orders,startAddresses,endAddresses= dropColumns.dropAllColumns(ds,d,os,sa,ea) # joining the 5 tables merge=Merge() finalData = merge.mergeAllTables(drivingSearch,delivery,orders,startAddresses,endAddresses) # Converting the date columns from Object type to DateTime finalData= transform.transformdate(finalData) sqlload=SQL_Load() sqlload.loadDataToStaging(finalData)
'Content-Type': "application/x-www-form-urlencoded", 'Cookie': cookie, 'Host': "zxgk.court.gov.cn", 'Origin': "http://zxgk.court.gov.cn", 'Referer': "http://zxgk.court.gov.cn/shixin/index_form.do", 'Upgrade-Insecure-Requests': "1", 'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36", } data = { 'pName': info, 'pCardNum': '', 'pProvince': 0, 'pCode': pCode, 'captchaId': captchaId, } response = requests.post(findUrl, headers=headers, data=data) if_continue = Extract(response.text) cookie = Cook(cookie, response.cookies.get_dict()) cookie = Rep(cookie) plt.close() print('Close captcha')
else: break logger.debug( "Scraping Module : Total Links for : " + str(keyword) + " is : " + str(len(current_advertisment_links))) # Getting Search Box Selenium Element to Clear its Text before Inputting Next Text search_box = wait.until(EC.element_to_be_clickable((By.ID, 'SearchKeyword'))) time.sleep(2) search_box.clear() else: browser.close() browser.quit() logger.debug("Scraping Module : Starting Data Scraping") extract = Extract() logger.debug("Scraping Module : Final Processing For All Advertisements In Progress") current_timestamp = extract.extract_data(browser, advertisment_links, finalTimestamp_in_property, HandleProperties(), logger, province_name, city_name) updated_date = (date.today() - timedelta(days=1)).strftime("%d/%m/%Y") location_dictionary = openFile("r", "") province_dictionary = location_dictionary["province_dict"] city_dictionary = location_dictionary["city_dict"] province_name = province_dictionary.get(province_argument) cities_json = city_dictionary.get(province_argument) city_json = cities_json.get(city_argument) city_name = city_json.get("name") if type_argument == "w": search_type = "Wanted"
import numpy as np import sys, os if len(sys.argv) != 3: print 'Usage: python train_hog_human_detectory.py [lokacija baze] [mode]' print 'mode: 0 - train, 1 - test' sys.exit(0) if os.path.isdir(sys.argv[1]) == False: print 'Ne postoji direktorij', sys.argv[1] sys.exit(0) db = LoadDataSet(sys.argv[1]) cut = CutPicture(64.0, 128.0, 3) hog = HOG(9, (6,6), (3,3)) ex = Extract(hog, cut) if int(sys.argv[ 2 ]) == 0: print 'Generiranje znacajki...' pos, neg = db.loadTrainSet() lPos = 1178 lNeg = 1359 #X, y = ex.getSamples(pos, neg, lPos, lNeg) X, y = ex.getSamples(pos, neg) X = np.array(X).astype('float32') y = np.array(y).astype('float32')
def __init__(self, name): self.name = name self.extract = Extract(name)