def main(args):
    print("Get cooc of each doc from corpus")
    cooc_model = Processing()

    # savepath = "sample_data/"
    savepath = args.savepath
    coocpath = savepath + 'cooc/'

    filepath = args.filepath

    if not os.path.isdir(coocpath):
        os.system('mkdir ' + coocpath)

    if args.data_type == 'csv':
        df = pd.read_csv(filepath)
    else:
        df = pd.read_csv(filepath, sep='\t', )

    print("Creation Finished.. Starts new job")
    print(" ")

    print("Make a graph")

    cooc_path_list = get_cooc_filenames(coocpath)
    feature_model = Feature(doc_path_list=cooc_path_list, dataframe=df)

    print("Make all features and load all to dataframe ")
    df = feature_model.make_df_from_dataset()

    df.to_csv(savepath + 'result_0~10.csv') # change name
    print("Completed")
Example #2
0
def compute_feature_vec(orders):
    feature_vec = []

    for element in orders:
        if element > 3 or element < 0:
            raise ValueError('Can only create monomials of degree 0 to 3')

    # constant
    if 0 in orders:
        feature_vec = [Feature(np.array([]), 'multiply')]

    # first order monomials: linear (additional features: 15, total 16)

    if 1 in orders:
        for i in range(n_dimensions_x):
            feature_vec.append(Feature(np.array([i]), 'multiply'))

    if 2 in orders:
        # second order monomials: quadratic (additional features: 15*15 = 225, total 241)
        for i in range(n_dimensions_x):
            for j in range(n_dimensions_x):
                feature_vec.append(Feature(np.array([i, j]), 'multiply'))

    if 3 in orders:
        for i in range(n_dimensions_x):
            for j in range(n_dimensions_x):
                for k in range(n_dimensions_x):
                    feature_vec.append(Feature(np.array([i, j, k]),
                                               'multiply'))

    return feature_vec
 def __init__(self):
     self.threshold_score = 1.0
     self.size_threshold = 20
     self.k = 57
     self.top_10_cliques = './top_cliques/'
     self.adj_list = Graph().read_adjacency_list()
     self.feature_vector = Feature().read_features()
     self.topk_features = Feature().read_topk_features(self.k)
Example #4
0
def main():
    # ================== setup myo-python (do not change) =====================
    myo.init(sdk_path='../../myo_sdk')  # Compile Python binding to Myo's API
    hub = myo.Hub()  # Create a Python instance of MYO API
    if not ConnectionChecker(
    ).ok:  # Check connection before starting acquisition:
        quit()
    # =========================================================================
    # calculate the Mean Absolute Value
    # Setup our custom processor of MYO's events.
    # EmgBuffer will acquire new data in a buffer (queue):
    listener = Buffer(
        buffer_len=512
    )  # At sampling rate of 200Hz, 512 samples correspond to ~2.5 seconds of the most recent data.
    calculate = Feature(input_len=512)
    # Setup multichannel plotter for visualisation:
    plotter = MultichannelPlot(
        nchan=8, xlen=512
    )  # Number of EMG channels in MYO armband is 8 , window size is 15 for MAV
    freq = 200
    move = cursor(freq)

    # Tell MYO API to start a parallel thread that will collect the data and
    # command the MYO to start sending EMG data.
    with hub.run_in_background(
            listener
    ):  # This is the way to associate our listener with the MYO API.
        print('Streaming EMG ... Press shift-c to stop.')
        while hub.running:
            time.sleep(0.040)
            # Pull recent EMG data from the buffer
            emg_data = listener.get_emg_data()
            # Transform it to numpy matrix
            emg_data = np.array([x[1] for x in emg_data])

            # avoid len() report error
            if (emg_data.ndim == 2):
                if (emg_data.shape[0] == 512):
                    # calculate MAV of emg data
                    mav_data = calculate.MAV(emg_data)
                    mav_data = np.array(mav_data.T)

                    plotter.update_plot(mav_data)

                    move.move_cursor(mav_data)

            if keyboard.is_pressed('C'):
                print('Stop.')
                break
Example #5
0
 def add_feature(self, id_, time=None):
     if self.has_feature(id_):
         return
     try:
         feature = Feature(id_, 'anatomy_features')
     except KeyError:
         pass
     else:
         if feature.slot is not None:
             self.remove_feature_by_slot(feature.slot)
         self.features.append(feature)
Example #6
0
def create(path, total, index):
    list = []
    counter = 0
    '''
    #检验重复是否可用
    zero = Feature("(1*2+3)*4","5*4")
    one = Feature("(2*1+3)*4","5*4")
    list.append(zero)
    if check(one, list):
        list.append(one)
    else:
        counter +=1
    '''
    '''
    #先生成一次题目,重复的不插入
    for i in range(total):
        string,symble = que_creation(index)
        a = Feature(string,symble)
        if check(a,list) :
            list.append(a)
        else:
            counter +=1
            continue
    print("counter:%d" %counter)
    
    #补足缺少的题(有漏洞,此方法报废)
    for i in range(counter):
        string, symble = que_creation(index)
        a = Feature(string, symble)
        if check(a, list):
            list.append(a)
        else:
            counter += 1
            continue
    '''
    #生成足够数量的题目
    while (counter < total):
        string, symble = que_creation(index)
        a = Feature(string, symble)
        if check(a, list):
            list.append(a)
            counter += 1
        else:
            continue

    with open(path, 'w', encoding='utf-8') as x:
        line = 0
        for i in list:
            line += 1
            x.write(str(line) + '. ' + i.problem + '\n')
Example #7
0
def create(path,total,index):
    list = []
    counter = 0
    '''
    zero = Feature("(1*2+3)*4","5*4")
    one = Feature("(2*1+3)*4","5*4")
    list.append(zero)
    if check(one, list):
        list.append(one)
    else:
        counter +=1
    '''
    for i in range(total):
        string,symble = que_creation(index)
        a = Feature(string,symble)
        if check(a,list) :
            list.append(a)
        else:
            counter +=1
            continue
    print("counter:%d" %counter)


    for i in range(counter):
        string, symble = que_creation(index)
        a = Feature(string, symble)
        if check(a, list):
            list.append(a)
        else:
            counter += 1
            continue

    with open(path, 'w', encoding='utf-8') as x:
        line = 0
        for i in list:
            line += 1
            x.write(str(line) + '. ' + i.problem + '\n')
Example #8
0
    def merge_characteristics(self, name, merge_threshold): # returns if one or more characteristics were merged (boolean)
        logging.debug("Merging characterics")
        characteristics = self.db.characteristics.find({"name": name})
        meta = Meta(name, self.db)

        chars = list(characteristics)

        # find "best fit" (other characteristic with minimal distance) for each characteristic
        best_fits = [None]*len(chars) #  list of tuples: (index of best fit, distance)
        best_distance = 1.
        for first_i, first in enumerate(chars):
            for second_i, second in enumerate(chars):
                if (first['_id'] is not second['_id']) and (idents_disjoint(first['ident'], second['ident'])):
                    distance = Feature.from_db(first).distance_to(Feature.from_db(second), meta.get_attr_ranges())
                    if (not best_fits[first_i]) or (distance < best_fits[first_i][2]):
                        best_fits[first_i] = (first_i, second_i, distance)
                        if distance < best_distance: best_distance = distance

        if best_distance > (1-merge_threshold): return False # signalize that no characteristic needed to be merged
        
        for bf in best_fits:
            if not bf: continue # continue if first has been merged before
            if not best_fits[bf[1]]: continue # continue if second has been merged before

            first = Feature.from_db(chars[bf[0]])
            second = Feature.from_db(chars[bf[1]])

            first.merge(second)
            self.db.characteristics.save(first.db_entry())
            self.db.characteristics.remove({"_id": second._id})
            best_fits[bf[0]] = None
            best_fits[bf[1]] = None

            # TODO recalculate best_fits with same bf[0]

        return True # signalize that one or more characteristics were merged
Example #9
0
    def _load_feature(self,name,type_name,virtual=False,virtual_function_code=None):
        
        if name in self._features: #Feature already exist
            feature = self.get_feature(name)
            feature.virtual_function_code = virtual_function_code
            self.get_datastore().map(name, feature.format_function) #f
            feature._refresh()

        else:
            feature = Feature.create_feature(self,name,type_name,virtual)
            self.set_feature(name, feature)
            feature.seq_order = self._get_next_seq_order()
            feature.virtual_function_code = virtual_function_code
            self.get_datastore().map(name, feature.format_function) #force value types
            feature._discover()
            
        return feature
Example #10
0
    def parseFeaturedElement(self, element, stream):
        """Parses an Element (Terminal or Nonterminal) with attached features.
           We do this by first doing a normal parse of the target element and
           then trying to unify it's semantic value with the attached features.
           If they unify, we return the unified value as the semantic value;
           if they don't unify, we fail the parse."""
        target = element.target
        if issubclass(type(target), Terminal):
            parser = self.parseTerminal
        elif issubclass(type(target), Nonterminal):
            parser = self.parseNonterminal
        else:
            raise ValueError("Features can only be attached to Terminals and Nonterminals")

        for aparse in parser(target, stream):
            # We're not doing the unification yet
            unified = Feature.unify(element.features, aparse)
            print "Unifying ", element.features, aparse
            if unified:
                yield unified
Example #11
0
 def add_feature(self, id_):
     self.features.append(Feature(id_, self.features_data_dict))
Example #12
0
import sys
from features import Feature

if __name__ == '__main__':    
    ui = Feature()
    
def main(args):
    print("Get cooc of each doc from corpus")
    cooc_model = Processing()

    savepath = "sample_data/"
    coocpath = savepath + 'cooc/'

    filepath = args.filepath

    if not os.path.isdir(coocpath):
        os.system('mkdir ' + coocpath)

    if args.data_type == 'csv' or 'tsv':
        if args.data_type == 'csv':
            df = pd.read_csv(filepath)  # path 가 현재는 dir, 근데
        else:
            df = pd.read_csv(
                filepath,
                sep='\t',
            )

        with tqdm(total=len(df['text']
                            [18087:18200])) as pbar:  #change index here
            no_processed_idx = []
            f = open(savepath + "no_processed_index.txt",
                     'a',
                     encoding='utf-8')
            f.write("Not process index:\n")
            for idx, text in enumerate(
                    df['text'][18087:18200]):  #change index here
                try:
                    pbar.update(1)
                    cooc_model.cooc(text=text,
                                    savepath="{0}/{1}.csv".format(
                                        coocpath, idx + 18087))
                except Exception as e:
                    f.write("{}, index:{}\n".format(e, idx + 18087))

        f.close()
        print(" ")
        print("Creation Finished.. Starts new job")
        print(" ")

        print("Make a graph")

        feature_model = Feature(doc_path_list=coocpath, dataframe=df)

        print("Make all features and load all to dataframe ")
        df = feature_model.make_df_from_dataset()

        df.to_csv(savepath + 'result.csv')
        print("Completed")

    elif args.data_type == 'txt' or 'text':
        path_fake = savepath + '/data/fake'
        path_true = savepath + '/data/true'

        doc_path_list_f = get_doc_filenames(path_fake)
        doc_path_list_t = get_doc_filenames(path_true)

        doc_label = [0] * len(doc_path_list_f) + [1] * len(doc_path_list_t)

        df = pd.DataFrame(doc_label, columns=['label'])

        with tqdm(total=len(doc_path_list_f),
                  desc="co-occurrence matrix creation - fake news") as pbar:
            for idx, doc_path in enumerate(doc_path_list_f):
                pbar.update(1)
                cooc_model.cooc(filepath=doc_path,
                                savepath="{0}/{1}.csv".format(path_fake, idx))

        with tqdm(total=len(doc_path_list_t),
                  desc="co-occurrence matrix creation - true news") as pbar:
            for idx, doc_path in enumerate(doc_path_list_t):
                pbar.update(1)
                cooc_model.cooc(filepath=doc_path,
                                savepath="{0}/{1}.csv".format(path_true, idx))

        print(" ")
        print("Creation Finished.. Starts new job")
        print(" ")

        print("Make a graph")
        cooc_f_list = get_cooc_filenames(document_path=path_fake)
        cooc_t_list = get_cooc_filenames(document_path=path_true)
        cooc_path_list = cooc_f_list + cooc_t_list

        feature_model = Feature(doc_path_list=cooc_path_list, dataframe=df)

        print("Make all features and load all to dataframe ")
        df = feature_model.make_df_from_dataset()

        df.to_csv(savepath + '/data/' + 'result.csv')
        print("Completed")
class ExtendFeatureClique():
    '''
    Takes in a clique and extends it iteratively,
    by adding vertices as per best feature heuristic
    till the score is more than threshold
    '''
    def __init__(self):
        self.threshold_score = 1.0
        self.size_threshold = 20
        self.k = 57
        self.top_10_cliques = './top_cliques/'
        self.adj_list = Graph().read_adjacency_list()
        self.feature_vector = Feature().read_features()
        self.topk_features = Feature().read_topk_features(self.k)

    def extend(self, clique, index):
        # @param clique, list of nodes in the current clique
        # @param nodes, list of nodes
        # @return maximal clique above threshold
        clique_size = len(clique)
        degree_num = clique_size * (clique_size - 1.0)
        nodes = self.adj_list[index][:]
        nodes[-1] = nodes[-1].strip('\n')
        clique[-1] = clique[-1].strip('\n')
        for vertex in clique:
            if vertex in nodes:
                nodes.remove(vertex)
        while True:
            if len(nodes) == 0:
                break
            d = {}
            nums = {}
            max_benefit = 0.0
            for node in nodes:
                increment = 0
                score = 0.0
                max_score = (reduce(
                    lambda x, y: x + y,
                    self.topk_features.values())) * clique_size * 1.0
                for vertex in clique:
                    for key, value in self.topk_features.items():
                        if key in self.feature_vector[
                                node] and key in self.feature_vector[vertex]:
                            if self.feature_vector[node][
                                    key] == self.feature_vector[vertex][key]:
                                score += value
                    if node in self.adj_list[vertex]:
                        increment += 1
                d[node] = (score * 1.0)
                nums[node] = 2 * increment
                max_benefit = max(max_benefit, d[node])

            for key, value in d.items():
                if value == max_benefit:
                    degree_num += nums[key]
                    clique_size += 1

            degree_score = (degree_num * 1.0) / (clique_size *
                                                 (clique_size - 1.0) * 1.0)

            if clique_size > self.size_threshold:
                break
            else:
                for key, value in d.items():
                    if value == max_benefit:
                        clique.append(key)
                        nodes.remove(key)
        return clique
Example #15
0
                                                      n_dimensions_x)

# Feature transform
transform = True
constant = True
first = True
second = True
third = True
exponential = False

feature_vec = []

if transform:
    # constant
    if constant:
        feature_vec = [Feature(np.array([]), 'multiply')]

    # first order monomials: linear (additional features: 15, total 16)

    if first:
        for i in range(n_dimensions_x):
            feature_vec.append(Feature(np.array([i]), 'multiply'))

    if second:
        # second order monomials: quadratic (additional features: 15*15 = 225, total 241)
        for i in range(n_dimensions_x):
            for j in range(n_dimensions_x):
                feature_vec.append(Feature(np.array([i, j]), 'multiply'))

    if third:
        for i in range(n_dimensions_x):
Example #16
0
 def __init__(self, basis):
     self.features = []
     self.basis = Feature(basis, 'anatomy_features')
     self.add_feature(self.basis)
     self.wetness = 0
     self.stretch = 0
Example #17
0
def feature_extraction(document):
    feat = Feature(document.filename).execute(hop_length=512)
    return feat.df