def load_data (data_type, clear_data, data_sample=False, n_samples=0, n_samples_for_class=False, name='Automotive'): data = [] with open(AutomotiveDataset.get_dataset_file(['automotive', "classify_auto.txt"]), "r", encoding="utf8") as f: lines = f.readlines() for line in lines: if "\t" in line: sequence, label = line.replace('\n', '').split('\t') # change categorical value to numeric if label == "automobile_industry": label = 1 else: label = 0 if sequence is not None and label is not None: if clear_data == True: sequence = Preprocessing.deleteUrls(sequence) sequence = Preprocessing.deleteHtmlEntities(sequence) sequence = Preprocessing.deleteHtmlTags(sequence) sequence = Preprocessing.deletePunctuation(sequence) sequence = Preprocessing.deleteMultipleSpaces(sequence) data.append((sequence.strip(), label)) if data_sample == True: data = AutomotiveDataset.sampled_data(data, n_samples, n_samples_for_class) if data_type == Type.text: X, y = AutomotiveDataset.get_text_data(data) return Dataset(X, y, name) X, y = get_X_y(data) return Dataset(X, y, name)
def PreprocessData(): # Create an object initialized to None pubmedarticlelists = None # Create FileOperations object fo = FileOperations() # parse the xml file p = Preprocessing() # If parsed file is present then load the file else parse the file if fo.exists(GV.parsedDataFile): pubmedarticlelists = p.LoadFile(GV.parsedDataFile) else: # Call the Parse method pubmedarticlelists, unsavedpmids = p.parse(GV.inputXmlFile) print(len(pubmedarticlelists)) print(len(unsavedpmids)) # Save the parsed data to a file fo.SaveFile(GV.parsedDataFile, pubmedarticlelists, mode='wb') fo.SaveFile(GV.unsavedPmidFile, unsavedpmids, mode='w') pubmedarticlelists = p.LoadFile(GV.parsedDataFile) del fo return pubmedarticlelists
#converting numpy array into image image = Image.fromarray(padding) #gives the face co-ordinates face_coord, _ = mtcnn.detect(image) if face_coord is not None: for coord in face_coord: for x1, y1, x2, y2 in [coord]: x1, y1, x2, y2 = r(x1), r(y1), r(x2), r(y2) #face array face = padding[y1:y2, x1:x2] #Preprocessing preprocess = Preprocessing(img=Image.fromarray(face)) #tensor array tensor_img_array = preprocess.preprocessed_arrays() #Predicting prob, label = torch.max(torch.exp( model(tensor_img_array.to(device))), dim=1) scale = round((y2 - y1) * 35 / 100) #mini box cv.rectangle(frame, (x1 - 50, y1 - 50), (x1 - 40, y1 - 40), color_dict[label.item()], -1) #Bounding box cv.rectangle(frame, (x1 - 50, y1 - 50), (x2 - 50, y2 - 50),
def GetDocuments(articlelists, title=True, abstract=True, meshwords=False): p = Preprocessing() docs = p.GetDocuments(articlelists, title=title, abstract=abstract, meshwords=meshwords) del p return docs