예제 #1
0
    def load_data (data_type, clear_data, data_sample=False, n_samples=0, n_samples_for_class=False, name='Automotive'):
        data = []
        with open(AutomotiveDataset.get_dataset_file(['automotive', "classify_auto.txt"]), "r", encoding="utf8") as f:
            lines = f.readlines()
            for line in lines:
                if "\t" in line:
                    sequence, label = line.replace('\n', '').split('\t')
                    # change categorical value to numeric
                    if label == "automobile_industry":
                        label = 1
                    else:
                        label = 0

                    if sequence is not None and label is not None:
                        if clear_data == True:
                            sequence = Preprocessing.deleteUrls(sequence)
                            sequence = Preprocessing.deleteHtmlEntities(sequence)
                            sequence = Preprocessing.deleteHtmlTags(sequence)
                            sequence = Preprocessing.deletePunctuation(sequence)
                            sequence = Preprocessing.deleteMultipleSpaces(sequence)
                        data.append((sequence.strip(), label))

        if data_sample == True:
            data = AutomotiveDataset.sampled_data(data, n_samples, n_samples_for_class)

        if data_type == Type.text:
            X, y = AutomotiveDataset.get_text_data(data)
            return Dataset(X, y, name)

        X, y = get_X_y(data)
        return Dataset(X, y, name)
예제 #2
0
def PreprocessData():
    # Create an object initialized to None
    pubmedarticlelists = None

    # Create FileOperations object
    fo = FileOperations()

    # parse the xml file
    p = Preprocessing()

    # If parsed file is present then load the file else parse the file
    if fo.exists(GV.parsedDataFile):
        pubmedarticlelists = p.LoadFile(GV.parsedDataFile)

    else:
        # Call the Parse method
        pubmedarticlelists, unsavedpmids = p.parse(GV.inputXmlFile)

        print(len(pubmedarticlelists))
        print(len(unsavedpmids))

        # Save the parsed data to a file
        fo.SaveFile(GV.parsedDataFile, pubmedarticlelists, mode='wb')
        fo.SaveFile(GV.unsavedPmidFile, unsavedpmids, mode='w')

        pubmedarticlelists = p.LoadFile(GV.parsedDataFile)

    del fo

    return pubmedarticlelists
예제 #3
0
        #converting numpy array into image
        image = Image.fromarray(padding)

        #gives the face co-ordinates
        face_coord, _ = mtcnn.detect(image)

        if face_coord is not None:
            for coord in face_coord:
                for x1, y1, x2, y2 in [coord]:
                    x1, y1, x2, y2 = r(x1), r(y1), r(x2), r(y2)

                    #face array
                    face = padding[y1:y2, x1:x2]

                    #Preprocessing
                    preprocess = Preprocessing(img=Image.fromarray(face))
                    #tensor array
                    tensor_img_array = preprocess.preprocessed_arrays()

                    #Predicting
                    prob, label = torch.max(torch.exp(
                        model(tensor_img_array.to(device))),
                                            dim=1)

                    scale = round((y2 - y1) * 35 / 100)
                    #mini box
                    cv.rectangle(frame, (x1 - 50, y1 - 50), (x1 - 40, y1 - 40),
                                 color_dict[label.item()], -1)

                    #Bounding box
                    cv.rectangle(frame, (x1 - 50, y1 - 50), (x2 - 50, y2 - 50),
예제 #4
0
def GetDocuments(articlelists, title=True, abstract=True, meshwords=False):
    p = Preprocessing()
    docs = p.GetDocuments(articlelists, title=title, abstract=abstract, meshwords=meshwords)
    del p
    return docs