Example #1
0
def similor_sort(sourceData, classicData, num):
    """
    :param sourceData: dataframe include
    :param classicData: classic picture
    :param num: how many picture to pick out
    :return:
    """
    start_time = time.time()

    ref_data = SFrame()
    for index, row in sourceData.iterrows():
        #print row
        path = row['path']
        img = tc.Image(path)
        ref_data = ref_data.append(SFrame({'path': [path], 'image': [img]}))
    ref_data = ref_data.add_row_number()

    # print ref_data

    query_data = SFrame()
    for index, row in classicData.iterrows():
        path = row['path']
        img = tc.Image(path)
        query_data = query_data.append(SFrame({
            'path': [path],
            'image': [img]
        }))
    query_data = query_data.add_row_number()

    model = tc.image_similarity.create(ref_data,
                                       label=None,
                                       feature=None,
                                       model='resnet-50',
                                       verbose=True)
    if num == 0:
        num = ref_data.num_rows()

    similar_images = model.query(query_data, k=num)

    ret_array = np.zeros((query_data.num_rows(), num))
    for image in similar_images:
        ref_label = image['reference_label']
        distance = image['distance']
        query_label = image['query_label']
        ret_array[query_label][ref_label] = distance

    mean = np.mean(ret_array, axis=0)
    sourceData.insert(2, 'distance', (mean))
    #sort = np.argsort(mean)
    # print sourceData

    elapsed_time = time.time() - start_time
    print("Time elapsed = %d" % (elapsed_time))
    return sourceData
    def fields_of_study_papers_ids(self, levels=(1, 2, 3)):
        """
        Creates SFrames with each Fields of study PaperIds
        :param levels: list of fields of study level

        """

        sf = SFrame()
        for level in tqdm(levels):
            sf = sf.append(self._create_field_of_study_paper_ids(level))
        return sf
Example #3
0
    def sjr_to_csv(self, regex):
        sjr_sf = SFrame()
        for p in self._dataset_dir.glob(regex):
            if p.suffix == ".csv":
                y = int(re.match(r'.*([1-3][0-9]{3})', p.name).group(1))
                sf = SFrame.read_csv(str(p), delimiter=';')
                sf['Year'] = y
                sf = sf.rename({"Total Docs. (%s)" % y: "Total Docs."})
                extra_cols = ["Categories"]
                for c in extra_cols:
                    if c not in sf.column_names():
                        sf[c] = ''
                sjr_sf = sjr_sf.append(sf)

        r_issn = re.compile('(\\d{8})')
        sjr_sf['Issn'] = sjr_sf['Issn'].apply(lambda i: r_issn.findall(i))
        return sjr_sf.stack('Issn', new_column_name='ISSN')