def predict(self, test_input, input_type, test_case_count=25):
     normalize = Normalize()
     if input_type == 'RANDOM_INPUT':
         input_count = 0
         for question in test_input:
             input_count += 1
             question_ = normalize.normalize(question)
             logging.debug('Test Case No.{}: {}'.format(
                 input_count, str(question)))
             logging.debug('-' * (len(question) + 16))
             logging.debug('Predicted Tags: {}'.format(
                 self.tag_predictor(question_)))
         logging.debug('')
     else:
         test_idx = np.random.randint(len(test_input), size=test_case_count)
         logging.debug("Predicted Vs Ground Truth for {} sample".format(
             test_case_count))
         logging.debug('-' * 50)
         logging.debug('')
         input_count = 0
         for idx in test_idx:
             input_count += 1
             test_case = idx
             question = str(X_test[test_case])
             logging.debug('Test Case No.{}: {}'.format(
                 input_count, question))
             logging.debug('-' * 100)
             logging.debug("Question ID:    {}".format(test_case))
             logging.debug('Predicted: ' + str(
                 self.tag_predictor(normalize.normalize_(
                     X_test[test_case]))))
             logging.debug('Ground Truth: ' + str(
                 self._tag_encoder.inverse_transform(
                     np.array([y_test[test_case]]))))
             logging.debug('\n')
Beispiel #2
0
    def input(self):
        fin = open('F:\\data\\ml\\2\\page_blocks_test_feature.txt', 'r')
        lines = fin.readlines()
        row = 0
        for line in lines:
            list = line.strip('\n').split(' ')
            self.matx[row][0:10] = list
            row += 1
        Normalize.normalize(self.matx)

        fin = open('F:\\data\\ml\\2\\page_blocks_test_label.txt', 'r')
        lines = fin.readlines()
        row = 0
        for line in lines:
            list = line.strip('\n')
            self.label[row] = list[0]
            row += 1
    def __init__(self,
                 in_size,
                 n_out=None,
                 non_lin='HT',
                 method='cos',
                 aft_nonlin=None,
                 affinity_dict=None,
                 type_layer='regular'):

        super(Graph_Layer_Wrapper, self).__init__()

        n_out = in_size if n_out is None else n_out

        if type_layer == 'regular':
            self.graph_layer = Graph_Layer(in_size,
                                           n_out=n_out,
                                           method=method,
                                           affinity_dict=affinity_dict)
        elif type_layer == 'cooc':
            self.graph_layer = Graph_Layer_Cooc(in_size, n_out=n_out)

        self.aft = None
        if aft_nonlin is not None:
            self.aft = []

            to_pend = aft_nonlin.split('_')
            for tp in to_pend:
                if tp.lower() == 'ht':
                    self.aft.append(nn.Hardtanh())
                elif tp.lower() == 'rl':
                    self.aft.append(nn.ReLU())
                elif tp.lower() == 'l2':
                    self.aft.append(Normalize())
                elif tp.lower() == 'ln':
                    self.aft.append(nn.LayerNorm(n_out))
                elif tp.lower() == 'bn':
                    self.aft.append(
                        nn.BatchNorm1d(n_out,
                                       affine=False,
                                       track_running_stats=False))
                elif tp.lower() == 'sig':
                    self.aft.append(nn.Sigmoid())
                else:
                    error_message = str('non_lin %s not recognized', non_lin)
                    raise ValueError(error_message)
            self.aft = nn.Sequential(*self.aft)

        # self.do = nn.Dropout(0.5)
        if non_lin is None:
            self.non_linearity = None
        elif non_lin == 'HT':
            self.non_linearity = nn.Hardtanh()
        elif non_lin.lower() == 'rl':
            self.non_linearity = nn.ReLU()
        else:
            error_message = str('non_lin %s not recognized', non_lin)
            raise ValueError(error_message)
Beispiel #4
0
 def construct_mn(self, n_layers, n_neurons, alpha=0.1):
     mn_inp = Input(shape=[self.noise_size])
     mn = Normalize()(mn_inp)
     mn = Dense(n_neurons, kernel_initializer='he_normal')(mn)
     for _ in range(1, n_layers):
         mn = Dense(n_neurons, kernel_initializer='he_normal')(mn)
         mn = LeakyReLU(alpha)(mn)
     mn = Model(inputs=mn_inp, outputs=mn)
     return mn
Beispiel #5
0
    def __init__(self,
                 n_classes,
                 deno,
                 pretrained,
                 in_out=None,
                 graph_size=None,
                 method='cos'):
        super(Graph_Multi_Video, self).__init__()

        self.num_classes = n_classes
        self.deno = deno
        self.graph_size = graph_size

        if in_out is None:
            in_out = [2048, 64, 2048, 64]

        num_layers = len(in_out) - 3
        non_lin = 'HT'

        print 'NUM LAYERS', num_layers, in_out

        self.linear_layer = nn.Linear(in_out[0], in_out[1], bias=False)
        # for param in self.linear_layer.parameters():
        #     param.requires_grad = False
        non_lin = 'HT'

        if pretrained == 'ucf':
            model_file = '../experiments/just_mill_flexible_deno_8_n_classes_20_layer_sizes_2048_64_ucf/all_classes_False_just_primary_False_limit_500_cw_True_MultiCrossEntropy_100_step_100_0.1_0.001_0.001/model_99.pt'
        elif pretrained == 'activitynet':
            model_file = '../experiments/just_mill_flexible_deno_8_n_classes_100_layer_sizes_2048_64_activitynet/all_classes_False_just_primary_False_limit_500_cw_True_MultiCrossEntropy_50_step_50_0.1_0.001_0.001/model_49.pt'
        elif pretrained == 'random':
            model_file = '../experiments/just_mill_flexible_deno_8_n_classes_20_layer_sizes_2048_64_ucf/all_classes_False_just_primary_False_limit_500_cw_True_MultiCrossEntropy_100_step_100_0.1_0_0.001/model_99.pt'
        else:
            error_message = 'Similarity method %s not valid' % method
            raise ValueError(error_message)

        model_temp = torch.load(model_file)
        self.linear_layer.weight.data = model_temp.linear.weight.data

        self.graph_layers = nn.ModuleList()
        for num_layer in range(num_layers):
            self.graph_layers.append(
                Graph_Layer_Wrapper(in_out[num_layer + 2],
                                    n_out=in_out[num_layer + 3],
                                    non_lin=non_lin,
                                    method=method))

        last_layer = []

        last_layer.append(nn.Hardtanh())
        last_layer.append(Normalize())
        last_layer.append(nn.Dropout(0.5))
        last_layer.append(nn.Linear(in_out[-1], n_classes))
        last_layer = nn.Sequential(*last_layer)
        self.last_layer = last_layer
Beispiel #6
0
    def process(self, input_paths, output_paths):
        # Init steps
        hs = HashtagSplit()
        nr = Normalize()
        ct = Contract()

        # execute pipeline
        for input_path, output_path in zip(input_paths, output_paths):
            # data paths
            path_0 = input_path
            path_1 = output_path[:-4] + '_1' + output_path[-4:]
            path_2 = output_path[:-4] + '_2' + output_path[-4:]
            path_3 = output_path

            # set paths
            hs.set_paths(path_0, path_1)
            nr.set_paths(path_1, path_2)
            ct.set_paths(path_2, path_3)

            # run
            print("starting with " + os.path.basename(input_path))
            hs.run()
            print(os.path.basename(input_path) + ": hashtag done.")
            nr.run()
            print(os.path.basename(input_path) + ": normalize done.")
            ct.run()
            print(os.path.basename(input_path) + ": contract done.")
Beispiel #7
0
    def __init__(self, n_classes, deno, in_out = None):
        super(Graph_Sim_Mill, self).__init__()
        
        self.num_classes = n_classes
        self.deno = deno

        # num_layers = 2
        # in_out = [2048,512,1024]
        # print 'NUM LAYERS', num_layers, in_out

        if in_out is None:
            in_out = [2048,2048]
        # in_out = [2048,512,2048]
        num_layers = len(in_out)-1
        

        print 'NUM LAYERS', num_layers, in_out

        
        self.linear_layer = nn.Linear(2048, 2048, bias = False)
        # for param in self.linear_layer.parameters():
        #     param.requires_grad = False

        # model_file = '../experiments/just_mill_ht_unit_norm_no_bias_ucf/all_classes_False_just_primary_False_deno_8_limit_500_cw_True_MultiCrossEntropy_100_step_100_0.1_0.001/model_99.pt'
        model_file = '../experiments/just_mill_ht_unit_norm_no_bias_fix_ucf/all_classes_False_just_primary_False_deno_8_limit_500_cw_True_MultiCrossEntropy_100_step_100_0.1_0.001_0.001_0.001__retry/model_99.pt'
        non_lin = 'HT'

        # model_file = '../experiments/just_mill_relu_unit_norm_no_bias_ucf/all_classes_False_just_primary_False_deno_8_limit_500_cw_True_MultiCrossEntropy_100_step_100_0.1_0.0001_128/model_99.pt'
        # non_lin = 'rl'

        model_temp = torch.load(model_file)
        # print model_temp.linear.weight.data.size()
        # print self.linear_layer.weight.data.size()
        # raw_input()
        self.linear_layer.weight.data = model_temp.linear.weight.data
        self.linear_layer.weight.require_grad = False
        
        self.graph_layers = nn.ModuleList()
        for num_layer in range(num_layers): 
            self.graph_layers.append(Graph_Layer_Wrapper(in_out[num_layer],in_out[num_layer+1], non_lin))
        
        
        # self.non_lin = nn.Hardtanh()
        last_layer = []
        
        last_layer.append(nn.Hardtanh())
        last_layer.append(Normalize())
        last_layer.append(nn.Dropout(0.5))
        last_layer.append(nn.Linear(in_out[-1],n_classes))
        last_layer = nn.Sequential(*last_layer)
        self.last_layer = last_layer
    def __init__(self, n_classes, deno):
        super(Just_Mill, self).__init__()

        self.num_classes = n_classes
        self.deno = deno

        self.linear = nn.Linear(2048, 64, bias=False)

        self.features = []
        self.features.append(nn.Hardtanh())
        self.features.append(Normalize())
        self.features.append(nn.Dropout(0.5))
        self.features.append(nn.Linear(64, n_classes))
        self.features = nn.Sequential(*self.features)
    def test_normalize(self):
        test = np.arange(1000)

        # normalize
        scaler = Normalize(test)
        normalized = scaler.normalize_data(test)

        min_val = min(normalized)
        max_val = max(normalized)

        # ensure values scaled to range (0, 1)
        self.assertGreaterEqual(min_val, 0.0)
        self.assertLessEqual(max_val, 1.0)

        # denormalize
        denormalized = scaler.denormalize_data(normalized)

        # ensure denormalized values are the same as the original
        for x, y in zip(test, denormalized):
            try:
                self.assertEqual(x, y)
            except AssertionError:
                self.assertAlmostEqual(x, y, 12)
Beispiel #10
0
    def test_normalize_equilateral(self):
        # find the Iris data set
        irisFile = os.path.dirname(os.path.realpath(__file__))
        irisFile = os.path.abspath(irisFile + "../../../datasets/iris.csv")

        norm = Normalize()
        result = norm.load_csv(irisFile)
        classes = norm.build_class_map(result, 4)
        norm.norm_col_equilateral(result, 4, classes, 0, 1)
        self.assertEqual(len(result[0]), 6)
        self.assertAlmostEqual(result[0][4], 0.06698, 3)
Beispiel #11
0
    def __init__(self,
                 n_classes,
                 deno,
                 in_out=None,
                 aft_nonlin='RL',
                 feat_ret=False):
        super(Graph_Multi_Video, self).__init__()

        self.num_classes = n_classes
        self.feat_ret = feat_ret
        self.deno = deno

        if in_out is None:
            in_out = [2048, 512]

        self.linear_layer = [nn.Linear(in_out[0], in_out[1], bias=True)]
        if aft_nonlin is not None:

            to_pend = aft_nonlin.split('_')
            for tp in to_pend:
                if tp.lower() == 'ht':
                    self.linear_layer.append(nn.Hardtanh())
                elif tp.lower() == 'rl':
                    self.linear_layer.append(nn.ReLU())
                elif tp.lower() == 'l2':
                    self.linear_layer.append(Normalize())
                elif tp.lower() == 'ln':
                    self.linear_layer.append(nn.LayerNorm(n_out))
                elif tp.lower() == 'bn':
                    self.linear_layer.append(
                        nn.BatchNorm1d(n_out,
                                       affine=False,
                                       track_running_stats=False))
                elif tp.lower() == 'sig':
                    self.linear_layer.append(nn.Sigmoid())
                else:
                    error_message = str('non_lin %s not recognized', non_lin)
                    raise ValueError(error_message)
            self.linear_layer = nn.Sequential(*self.linear_layer)

        last_graph = []
        last_graph.append(nn.Dropout(0.5))
        last_graph.append(nn.Linear(in_out[-1], n_classes, bias=True))

        self.last_graph = nn.Sequential(*last_graph)
Beispiel #12
0
    def __init__(self, n_classes, deno, layer_sizes):
        super(Just_Mill, self).__init__()

        self.num_classes = n_classes
        self.deno = deno

        self.linear = []
        self.linear.append(
            nn.Linear(layer_sizes[0], layer_sizes[1], bias=False))
        self.linear.append(nn.Hardtanh())
        self.linear.append(Normalize())
        self.linear = nn.Sequential(*self.linear)

        # self.features.append(nn.ReLU())
        self.features = []
        self.features.append(nn.Dropout(0.5))
        self.features.append(nn.Linear(layer_sizes[1], n_classes))
        self.features = nn.Sequential(*self.features)
Beispiel #13
0
    def __init__(self, n_classes, deno, in_out=None):
        super(Graph_Sim_Mill, self).__init__()

        torch.backends.cudnn.deterministic = True
        torch.manual_seed(999)

        self.num_classes = n_classes
        self.deno = deno

        # num_layers = 2
        # in_out = [2048,512,1024]
        # print 'NUM LAYERS', num_layers, in_out

        if in_out is None:
            in_out = [2048, 2048]
        # in_out = [2048,512,2048]
        num_layers = len(in_out) - 1

        print 'NUM LAYERS', num_layers, in_out

        self.linear_layer = nn.Linear(2048, in_out[-1], bias=False)
        non_lin = 'HT'

        # self.linear_layer.weight.data = model_temp.linear.weight.data
        self.linear_layer.weight.require_grad = False

        self.graph_layers = nn.ModuleList()
        for num_layer in range(num_layers):
            self.graph_layers.append(
                Graph_Layer_Wrapper(in_out[num_layer], in_out[num_layer + 1],
                                    non_lin))

        # self.non_lin = nn.Hardtanh()
        last_layer = []

        last_layer.append(nn.Hardtanh())
        last_layer.append(Normalize())
        last_layer.append(nn.Dropout(0.5))
        last_layer.append(nn.Linear(in_out[-1], n_classes))
        last_layer = nn.Sequential(*last_layer)
        self.last_layer = last_layer
Beispiel #14
0
    def mk_input_layers_for_G(self, step):

        n_sty_inp = self.get_n_inp_sty(step)
        self.mixing_matrices = ini_mixing_matrix(n_sty_inp, step + 1)
        mn_inps = [Input([self.latent_size]) for _ in range(n_sty_inp)]
        lct_fake_inp = Input([1])
        dens = [
            Dense(self.latent_size, **kernel_cond)
            for _ in range(self.n_layers_of_mn)
        ]
        nors = [Normalize()(mn_inps[i]) for i in range(n_sty_inp)]
        d = [nors[i] for i in range(n_sty_inp)]
        for i in range(n_sty_inp):
            for j in range(self.n_layers_of_mn):
                d[i] = dens[j](d[i])
        lct = LearnedConstTensor(self.img_shape[0][:2] +
                                 (self.latent_size, ))(lct_fake_inp)
        sty_out = [MixStyle(i, n_sty_inp, step + 1) for i in range(step + 1)]
        for i in range(step + 1):
            sty_out[i] = sty_out[i](d)
        return Model(inputs=[lct_fake_inp] + mn_inps,
                     outputs=[lct] + sty_out,
                     name='input_layers_{}_for_G'.format(str(step)))
def rnn_predict(stock, start, end):
    # get stock data
    try:
        df = get_stock_data(stock, start, end, json=False)
    except:
        # error info
        e = sys.exc_info()
        print(e)
        print("rnn predict fail")
        return e

    # normalize
    scaler = Normalize(df, max=True)
    normalized = scaler.normalize_data(df)

    # get training and testing inputs and outputs
    train_inputs, train_targets, test_inputs, test_targets = train_test_split(
        normalized)

    train_inputs = np.array(train_inputs)
    train_targets = np.array(train_targets)
    test_inputs = np.array(test_inputs)
    test_targets = np.array(test_targets)

    # returns 3d array in format [inputs, timesteps, features]
    train_inputs = to_3d(train_inputs)
    test_inputs = to_3d(test_inputs)

    NN = RNN_V2()
    train_outputs = NN.train(train_inputs, train_targets, epochs=100)
    test_outputs = NN.test(test_inputs)

    # de-normalize
    train_outputs = scaler.denormalize_data(train_outputs)
    train_targets = scaler.denormalize_data(train_targets)
    test_outputs = scaler.denormalize_data(test_outputs)
    test_targets = scaler.denormalize_data(test_targets).T

    # accuracy
    accuracy = 100 - mape(test_targets, test_outputs)

    return df[4:], pd.DataFrame(train_outputs), pd.DataFrame(
        test_outputs), str(round(accuracy, 2))
def calc_linear_regression(coeff, x):
    result = 0
    for i in range(1, len(coeff)):
        result += x[i - 1] * coeff[i]

    result += coeff[0]
    return result


# find the Iris data set
abaloneFile = os.path.dirname(os.path.realpath(__file__))
abaloneFile = os.path.abspath(abaloneFile + "../../datasets/abalone.csv")

# Normalize abalone file.

norm = Normalize()
abalone_work = norm.load_csv(abaloneFile)

# Make all columns beyond col #1 numeric.
for i in range(1, 9):
    norm.make_col_numeric(abalone_work, i)

# Discover all of the classes for column #1, the gender.
classes = norm.build_class_map(abalone_work, 0)

# Normalize gender one-of-n encoding.
norm.norm_col_one_of_n(abalone_work, 0, classes, 0, 1)

# Separate into input and ideal.

training = np.array(abalone_work)
    def predict(self, test_input, custom_input, test_case_count):
        normalize = Normalize()
        if custom_input:
            input_count = 0
            #prediction_df = pd.DataFrame(columns = ["Que No","Questions", "Predicted_Tags"])
            prediction_list = []
            for question in test_input:
                input_count += 1
                question_ = normalize.normalize(question)
                logging.debug('-' * (len(question) + 16))
                logging.debug('Test Case No.{}: {}'.format(
                    input_count, str(question)))
                predicted_tag = self.tag_predictor(question_)
                logging.debug('Predicted Tags: {}'.format(predicted_tag))
                prediction_list.append({
                    'que_no': input_count,
                    'questions': str(question),
                    'predicted_tags': predicted_tag
                })

                #logging.debug('')
            logging.debug('')
            return prediction_list

        else:
            test_idx = np.random.randint(len(test_input), size=test_case_count)
            logging.debug("Predicted Vs Ground Truth for {} sample(s)".format(
                test_case_count))
            logging.debug('-' * 50)
            logging.debug('')
            input_count = 0
            input_predicted_list = []
            prediction_score = 0
            predicted_tag_list = []
            prediction_list = []
            #pd.DataFrame(columns = ["Que No", "Questions", "Ground_Truth","Predicted_Tags"])
            for idx in test_idx:
                input_count += 1
                test_case = idx
                question = str(test_input[test_case])
                logging.debug('')
                logging.debug('-' * 100)
                logging.debug('Test Case No.{}:'.format(input_count))
                logging.debug("Question ID: {}".format(test_case))
                logging.debug('Question: {}'.format(question))
                predicted_tag = self.tag_predictor(
                    normalize.normalize_(question))
                predicted_tag_list.append(predicted_tag)
                ground_truth = self._tag_encoder.inverse_transform(
                    np.array([self._y_test[test_case]]))
                score = 0
                ground_truth_ = [*ground_truth[0]]
                #predicted_tag_ = [*predicted_tag]

                for tag in predicted_tag:
                    tags = [*tag]
                    for tag in tags:
                        if tag in ground_truth_:
                            if (len(tag) > 0):
                                score = 1
                                prediction_score += 1
                            break
                        else:
                            for gt_tag in ground_truth_:
                                if (gt_tag.startswith(tag)
                                        or tag.startswith(gt_tag)
                                    ) and len(gt_tag) > 0:
                                    score = 1
                                    prediction_score += 1
                                    break

                prediction_current = {
                    'que_no': input_count,
                    'questions': question,
                    'ground_truth': str(ground_truth),
                    'predicted_tags': str(predicted_tag)
                }
                prediction_list.append(prediction_current)

                # append row to the dataframe
                input_predicted_list.append(
                    [input_count, ground_truth, predicted_tag, score])

                # log the ground truth & prediction
                logging.debug('Predicted: ' + str(predicted_tag))
                logging.debug('Ground Truth: ' + str(ground_truth))
                logging.debug('\n')

            accuracy = prediction_score / input_count
            self._accuracy = accuracy
            return prediction_list
Beispiel #18
0
# Find the AIFH core files
aifh_dir = os.path.dirname(os.path.abspath(__file__))
aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep + "aifh")
sys.path.append(aifh_dir)

from normalize import Normalize


# find the Iris data set
irisFile = os.path.dirname(os.path.realpath(__file__))
irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv")

# Read the Iris data set.
print('Reading CSV file: ' + irisFile)
norm = Normalize()
result = norm.load_csv(irisFile)

# Setup the first four fields to "range normalize" between -1 and 1.
for i in range(0, 4):
    norm.make_col_numeric(result, i)
    norm.norm_col_range(result, i, -1, 1)

# Discover all of the classes for column #4, the iris species.
classes = norm.build_class_map(result, 4)

# Normalize iris species with equilateral encoding
norm.norm_col_equilateral(result, 4, classes, -1, 1)

# Display the resulting data
norm.display_data(result)
Beispiel #19
0
aifh_dir = os.path.dirname(os.path.abspath(__file__))
aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep +
                           "aifh")
sys.path.append(aifh_dir)

from normalize import Normalize

k = 3

# find the Iris data set
irisFile = os.path.dirname(os.path.realpath(__file__))
irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv")

# Read the Iris data set.
print('Reading CSV file: ' + irisFile)
norm = Normalize()
iris_data = norm.load_csv(irisFile)

# Prepare the iris data set.
classes = norm.col_extract(iris_data, 4)
norm.col_delete(iris_data, 4)
for i in range(0, 4):
    norm.make_col_numeric(iris_data, i)

# Cluster the Iris data set.
res, idx = kmeans2(np.array(iris_data), k)

for cluster_num in range(0, k):
    print("Cluster #" + str(cluster_num + 1))
    for i in range(0, len(idx)):
        if idx[i] == cluster_num:
Beispiel #20
0
    def build_G(self,
                step,
                input_layers=None,
                output_layers=None,
                merged_old_output_layers=None):

        n_sty_inp = self.get_n_inp_sty(step)
        self.mixing_matrices = ini_mixing_matrix(n_sty_inp, step + 1)
        G = input_layers
        if G == None:
            G = self.mk_input_layers_for_G(step)
        elif len(G.output) < step + 2:
            G.name = 'input_layers_{}_for_G'.format(step - 1)
            print('rebuild input layers... from {} to {}.'.format(
                step - 1, step))
            self.load_weights_by_name(G)
            n_sty_inp = self.get_n_inp_sty(step)
            lct_inp = Input([1])
            lct = None
            sty_inps = [Input([self.latent_size]) for _ in range(n_sty_inp)]
            nors = [Normalize()(inp) for inp in sty_inps]
            dens = []
            d = nors
            for layer in G.layers:
                if isinstance(layer, Dense):
                    dens.append(layer)
                if isinstance(layer, LearnedConstTensor):
                    lct = layer(lct_inp)
            for i in range(n_sty_inp):
                for j in range(self.n_layers_of_mn):
                    d[i] = dens[j](d[i])
            sty_mix = [
                MixStyle(i, n_sty_inp, step + 1)(d) for i in range(step + 1)
            ]
            G = Model(inputs=[lct_inp] + sty_inps,
                      outputs=[lct] + sty_mix,
                      name='input_layers_{}_for_G'.format(str(step)))

        inps = G.input
        G = G(inps)
        styles = G[1:]
        G = G[0]

        if self.generators[0] == None:
            self.generators[0] = self.mk_G_block(0, default_depth_G[0])

        for i in range(step):
            if self.generators[i] == None:
                self.generators[i] = self.mk_G_block(i, default_depth_G[i],
                                                     self.self_attns[i])
            G = self.generators[i]([G, styles[i]])

        old_G = G
        if self.generators[step] == None:
            self.generators[step] = self.mk_G_block(step,
                                                    default_depth_G[step],
                                                    self.self_attns[step])
        G = self.generators[step]([old_G, styles[step]])
        if output_layers == None:
            output_layers = self.mk_output_layers_for_G(step)
        G = output_layers(G)

        if merged_old_output_layers != None:
            G = self.mk_merge_layers_for_G(
                step, merged_old_output_layers)([old_G, G])

        self.G = Model(inputs=inps, outputs=G)
        self.mix_reg()
    def __init__(self,
                 n_classes,
                 deno,
                 in_out=None,
                 feat_dim=None,
                 graph_size=None,
                 method='cos',
                 sparsify=[0.8],
                 non_lin='HT',
                 aft_nonlin=None,
                 sigmoid=False,
                 layer_bef=None,
                 graph_sum=False,
                 background=False,
                 just_graph=False):
        super(Graph_Multi_Video, self).__init__()

        self.num_classes = n_classes
        self.background = background

        if self.background:
            assert sigmoid
            n_classes += 1

        self.deno = deno
        self.graph_size = graph_size
        self.sparsify = sparsify
        self.graph_sum = graph_sum
        self.just_graph = just_graph

        if in_out is None:
            in_out = [2048, 64]

        if feat_dim is None:
            feat_dim = [2048, 64]

        num_layers = len(sparsify)

        print 'NUM LAYERS', num_layers, in_out

        self.bn = None
        # nn.BatchNorm1d(2048, affine = False)
        self.linear_layer = nn.Linear(feat_dim[0], feat_dim[1], bias=True)

        if layer_bef is None:
            self.layer_bef = None
        else:
            self.layer_bef = []
            self.layer_bef.append(
                nn.Linear(layer_bef[0], layer_bef[1], bias=True))
            self.layer_bef.append(nn.ReLU())
            # self.layer_bef.append(Normalize())
            self.layer_bef = nn.Sequential(*self.layer_bef)

        self.graph_layers = nn.ModuleList()

        self.last_graphs = nn.ModuleList()

        for num_layer in range(num_layers):
            if self.sparsify[num_layer] == 'lin':
                lin_curr = []

                if non_lin == 'HT':
                    lin_curr.append(nn.Hardtanh())
                elif non_lin.lower() == 'rl':
                    lin_curr.append(nn.ReLU())
                elif non_lin is not None:
                    error_message = str('non_lin %s not recognized', non_lin)
                    raise ValueError(error_message)

                lin_curr.append(nn.Linear(in_out[0], in_out[1]))

                to_pend = aft_nonlin.split('_')
                for tp in to_pend:
                    if tp.lower() == 'ht':
                        lin_curr.append(nn.Hardtanh())
                    elif tp.lower() == 'rl':
                        lin_curr.append(nn.ReLU())
                    elif tp.lower() == 'l2':
                        lin_curr.append(Normalize())
                    elif tp.lower() == 'ln':
                        lin_curr.append(nn.LayerNorm(n_out))
                    elif tp.lower() == 'bn':
                        lin_curr.append(
                            nn.BatchNorm1d(n_out,
                                           affine=False,
                                           track_running_stats=False))
                    else:
                        error_message = str('non_lin %s not recognized',
                                            non_lin)
                        raise ValueError(error_message)
                lin_curr = nn.Sequential(*lin_curr)
                self.graph_layers.append(lin_curr)
            else:
                self.graph_layers.append(
                    Graph_Layer_Wrapper(in_out[0],
                                        n_out=in_out[1],
                                        non_lin=non_lin,
                                        method=method,
                                        aft_nonlin=aft_nonlin))

            if self.just_graph:
                if sigmoid:
                    aft_nonlin_curr = 'sig'
                else:
                    aft_nonlin_curr = None
                last_graph = Graph_Layer_Wrapper(in_out[-1],
                                                 n_classes,
                                                 non_lin=non_lin,
                                                 method=method,
                                                 aft_nonlin=aft_nonlin_curr)
            else:
                last_graph = []
                last_graph.append(nn.Dropout(0.5))
                last_graph.append(nn.Linear(in_out[-1], n_classes))
                if sigmoid:
                    last_graph.append(nn.Sigmoid())
                last_graph = nn.Sequential(*last_graph)
            self.last_graphs.append(last_graph)

        self.num_branches = num_layers
        print 'self.num_branches', self.num_branches
Beispiel #22
0
    if args[1] == 0:
        return 1
    return args[0] / args[1]

add_wrapper = FunctionWrapper(add, 2, "+")
sub_wrapper = FunctionWrapper(sub, 2, "-")
mul_wrapper = FunctionWrapper(mul, 2, "*")
div_wrapper = FunctionWrapper(div, 2, "/")

# find the Iris data set
polyFile = os.path.dirname(os.path.realpath(__file__))
polyFile = os.path.abspath(polyFile + "../../datasets/simple-poly.csv")

# Read the Iris data set.
print('Reading CSV file: ' + polyFile)
norm = Normalize()
poly_work = norm.load_csv(polyFile)
norm.make_col_numeric(poly_work,0)
norm.make_col_numeric(poly_work,1)

# Prepare training data.  Separate into input and ideal.
training = np.array(poly_work)
training_input = training[:, 0:1]
training_ideal = training[:, 1:2]

# Calculate the error with MSE.
def score_function(genome):
    # Loop over the training set and calculate the output for each.
    actual_output = []
    for input_data in training_input:
        genome.set_variable_value(["x"], input_data)
Beispiel #23
0
 def normalize(self, types):
     print "Data Normalize with ", types
     normalization = Normalize(self.data)
     normalization.normalizing(types, self.__type)
    def __init__(self,
                 n_classes,
                 deno,
                 in_out=None,
                 feat_dim=None,
                 graph_size=None,
                 method='cos',
                 num_switch=1,
                 focus=0,
                 sparsify=False,
                 non_lin='HT',
                 normalize=[True, True]):
        super(Graph_Multi_Video, self).__init__()

        self.num_classes = n_classes
        self.deno = deno
        self.graph_size = graph_size
        self.sparsify = sparsify

        if in_out is None:
            in_out = [2048, 64]
        if feat_dim is None:
            feat_dim = [2048, 64]

        num_layers = len(in_out) - 1
        # non_lin = 'HT'

        print 'NUM LAYERS', num_layers, in_out

        # if pretrained=='ucf':
        #     model_file = '../experiments/just_mill_flexible_deno_8_n_classes_20_layer_sizes_2048_64_ucf/all_classes_False_just_primary_False_limit_500_cw_True_MultiCrossEntropy_100_step_100_0.1_0.001_0.001/model_99.pt'
        # elif pretrained=='activitynet':
        #     model_file = '../experiments/just_mill_flexible_deno_8_n_classes_100_layer_sizes_2048_64_activitynet/all_classes_False_just_primary_False_limit_500_cw_True_MultiCrossEntropy_50_step_50_0.1_0.001_0.001/model_49.pt'
        # elif pretrained=='random':
        #     model_file = '../experiments/just_mill_flexible_deno_8_n_classes_20_layer_sizes_2048_64_ucf/all_classes_False_just_primary_False_limit_500_cw_True_MultiCrossEntropy_100_step_100_0.1_0_0.001/model_99.pt'
        # elif pretrained=='default':
        #     model_file = None
        # else:
        #     error_message = 'Similarity method %s not valid' % method
        #     raise ValueError(error_message)

        # if model_file is not None:
        #     model_temp = torch.load(model_file)
        #     # self.linear_layer.weight.data = model_temp.linear.weight.data
        # else:
        #     print 'NO MODEL FILE AAAAAAAA'

        self.linear_layers = nn.ModuleList()
        for idx_layer_num, layer_num in enumerate(range(num_layers)):

            if non_lin == 'HT':
                non_lin_curr = nn.Hardtanh()
            elif non_lin == 'RL':
                non_lin_curr = nn.ReLU()
            else:
                error_message = str('Non lin %s not valid', non_lin)
                raise ValueError(error_message)

            last_linear = []
            idx_curr = idx_layer_num * 2
            last_linear.append(
                nn.Linear(feat_dim[idx_curr],
                          feat_dim[idx_curr + 1],
                          bias=False))
            last_linear.append(non_lin_curr)
            if normalize[0]:
                last_linear.append(Normalize())
            last_linear.append(nn.Dropout(0.5))
            last_linear.append(nn.Linear(feat_dim[idx_curr + 1], n_classes))
            last_linear = nn.Sequential(*last_linear)
            self.linear_layers.append(last_linear)

        self.graph_layers = nn.ModuleList()
        for num_layer in range(num_layers):
            self.graph_layers.append(
                Graph_Layer_Wrapper(in_out[num_layer],
                                    n_out=in_out[num_layer + 1],
                                    non_lin=non_lin,
                                    method=method))

        # last_linear = []
        # if non_lin =='HT':
        #     last_linear.append(nn.Hardtanh())
        # elif non_lin =='RL':
        #     last_linear.append(nn.ReLU())
        # else:
        #     error_message = str('Non lin %s not valid', non_lin)
        #     raise ValueError(error_message)

        # last_linear.append(nn.Dropout(0.5))
        # last_linear.append(nn.Linear(in_out[1],n_classes))
        # last_linear = nn.Sequential(*last_linear)
        # self.last_linear = last_linear

        last_graph = []
        if non_lin == 'HT':
            last_graph.append(nn.Hardtanh())
        elif non_lin == 'RL':
            last_graph.append(nn.ReLU())
        else:
            error_message = str('Non lin %s not valid', non_lin)
            raise ValueError(error_message)

        if normalize[1]:
            last_graph.append(Normalize())

        last_graph.append(nn.Dropout(0.5))
        last_graph.append(nn.Linear(in_out[-1], n_classes))
        last_graph = nn.Sequential(*last_graph)
        self.last_graph = last_graph

        self.num_branches = num_layers + 1

        if type(num_switch) == type(1):
            num_switch = [num_switch] * self.num_branches

        self.num_switch = num_switch
        self.epoch_counters = [0] * self.num_branches
        self.focus = focus
        self.epoch_last = 0

        print 'self.num_branches', self.num_branches
        print 'self.num_switch', self.num_switch
        print 'self.epoch_counters', self.epoch_counters
        print 'self.focus', self.focus
        print 'self.epoch_last', self.epoch_last
Beispiel #25
0
def calc_linear_regression(coeff, x):
    result = 0
    for i in range(1, len(coeff)):
        result += x[i - 1] * coeff[i]

    result += coeff[0]
    return result


# find the Iris data set
abaloneFile = os.path.dirname(os.path.realpath(__file__))
abaloneFile = os.path.abspath(abaloneFile + "../../datasets/abalone.csv")

# Normalize abalone file.

norm = Normalize()
abalone_work = norm.load_csv(abaloneFile)

# Make all columns beyond col #1 numeric.
for i in range(1, 9):
    norm.make_col_numeric(abalone_work, i)

# Discover all of the classes for column #1, the gender.
classes = norm.build_class_map(abalone_work, 0)

# Normalize gender one-of-n encoding.
norm.norm_col_one_of_n(abalone_work, 0, classes, 0, 1)

# Separate into input and ideal.

training = np.array(abalone_work)
Beispiel #26
0
# Find the AIFH core files
aifh_dir = os.path.dirname(os.path.abspath(__file__))
aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep +
                           "aifh")
sys.path.append(aifh_dir)

from normalize import Normalize

# find the Wisconsin breast cancer data set
dataFile = os.path.dirname(os.path.realpath(__file__))
dataFile = os.path.abspath(dataFile +
                           "../../datasets/breast-cancer-wisconsin.csv")

# Normalize the Wisconsin file.

norm = Normalize()
data_file_work = norm.load_csv(dataFile)
norm.delete_unknowns(data_file_work)
norm.col_delete(data_file_work, 0)
norm.col_replace(data_file_work, 9, 4, 1, 0)

for i in range(0, 9):
    norm.make_col_numeric(data_file_work, i)

df = pd.DataFrame(data_file_work)
df.columns = [
    "clump_thickness", "size_uniformity", "shape_uniformity",
    "marginal_adhesion", "epithelial_size", "bare_nucleoli", "bland_chromatin",
    "normal_nucleoli", "mitoses", "class"
]
Beispiel #27
0
        return 1
    return args[0] / args[1]


add_wrapper = FunctionWrapper(add, 2, "+")
sub_wrapper = FunctionWrapper(sub, 2, "-")
mul_wrapper = FunctionWrapper(mul, 2, "*")
div_wrapper = FunctionWrapper(div, 2, "/")

# find the Iris data set
polyFile = os.path.dirname(os.path.realpath(__file__))
polyFile = os.path.abspath(polyFile + "../../datasets/simple-poly.csv")

# Read the Iris data set.
print('Reading CSV file: ' + polyFile)
norm = Normalize()
poly_work = norm.load_csv(polyFile)
norm.make_col_numeric(poly_work, 0)
norm.make_col_numeric(poly_work, 1)

# Prepare training data.  Separate into input and ideal.
training = np.array(poly_work)
training_input = training[:, 0:1]
training_ideal = training[:, 1:2]


# Calculate the error with MSE.
def score_function(genome):
    # Loop over the training set and calculate the output for each.
    actual_output = []
    for input_data in training_input:
Beispiel #28
0
aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep + "aifh")
sys.path.append(aifh_dir)

from normalize import Normalize
from rbf_network import RbfNetwork
from error import ErrorCalculation
from equilateral import Equilateral


# find the Iris data set
irisFile = os.path.dirname(os.path.realpath(__file__))
irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv")

# Read the Iris data set.
print('Reading CSV file: ' + irisFile)
norm = Normalize()
iris_work = norm.load_csv(irisFile)

# Extract the original iris species so we can display during the final validation.
ideal_species = [row[4] for row in iris_work]

# Setup the first four fields to "range normalize" between -1 and 1.
for i in range(0, 4):
    norm.make_col_numeric(iris_work, i)
    norm.norm_col_range(iris_work, i, 0, 1)

# Discover all of the classes for column #4, the iris species.
classes = norm.build_class_map(iris_work, 4)
inv_classes = {v: k for k, v in classes.items()}

# Normalize iris species using equilateral
Beispiel #29
0
aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep +
                           "aifh")
sys.path.append(aifh_dir)

from normalize import Normalize
from rbf_network import RbfNetwork
from error import ErrorCalculation
from equilateral import Equilateral

# find the Iris data set
irisFile = os.path.dirname(os.path.realpath(__file__))
irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv")

# Read the Iris data set.
print('Reading CSV file: ' + irisFile)
norm = Normalize()
iris_work = norm.load_csv(irisFile)

# Extract the original iris species so we can display during the final validation.
ideal_species = [row[4] for row in iris_work]

# Setup the first four fields to "range normalize" between -1 and 1.
for i in range(0, 4):
    norm.make_col_numeric(iris_work, i)
    norm.norm_col_range(iris_work, i, 0, 1)

# Discover all of the classes for column #4, the iris species.
classes = norm.build_class_map(iris_work, 4)
inv_classes = {v: k for k, v in classes.items()}

# Normalize iris species using equilateral
Beispiel #30
0
# Find the AIFH core files
aifh_dir = os.path.dirname(os.path.abspath(__file__))
aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep + "aifh")
sys.path.append(aifh_dir)

from normalize import Normalize


# find the Wisconsin breast cancer data set
dataFile = os.path.dirname(os.path.realpath(__file__))
dataFile = os.path.abspath(dataFile + "../../datasets/breast-cancer-wisconsin.csv")

# Normalize the Wisconsin file.

norm = Normalize()
data_file_work = norm.load_csv(dataFile)
norm.delete_unknowns(data_file_work)
norm.col_delete(data_file_work, 0)
norm.col_replace(data_file_work, 9, 4, 1, 0)

for i in xrange(0, 9):
    norm.make_col_numeric(data_file_work, i)

df = pd.DataFrame(data_file_work)
df.columns = ["clump_thickness", "size_uniformity", "shape_uniformity", "marginal_adhesion", "epithelial_size",
              "bare_nucleoli", "bland_chromatin", "normal_nucleoli", "mitoses", "class"]

train_cols = df.columns[0:9]

# Perform the logistic regression.
Beispiel #31
0
    def __init__(self,
                 n_classes,
                 deno,
                 in_out=None,
                 feat_dim=None,
                 graph_size=None,
                 method='cos',
                 sparsify=False,
                 non_lin='HT',
                 normalize=[True, True]):
        super(Graph_Multi_Video, self).__init__()

        self.num_classes = n_classes
        self.deno = deno
        self.graph_size = graph_size
        self.sparsify = sparsify

        if in_out is None:
            in_out = [2048, 64]
        if feat_dim is None:
            feat_dim = [2048, 64]

        num_layers = len(in_out) - 1

        print 'NUM LAYERS', num_layers, in_out

        self.linear_layers = nn.ModuleList()
        self.linear_layers_after = nn.ModuleList()
        for idx_layer_num, layer_num in enumerate(range(num_layers)):

            if non_lin == 'HT':
                non_lin_curr = nn.Hardtanh()
            elif non_lin == 'RL':
                non_lin_curr = nn.ReLU()
            else:
                error_message = str('Non lin %s not valid', non_lin)
                raise ValueError(error_message)

            idx_curr = idx_layer_num * 2

            self.linear_layers.append(
                nn.Linear(feat_dim[idx_curr],
                          feat_dim[idx_curr + 1],
                          bias=False))

            last_linear = []
            last_linear.append(non_lin_curr)
            if normalize[0]:
                last_linear.append(Normalize())
            last_linear.append(nn.Dropout(0.5))
            last_linear.append(nn.Linear(feat_dim[idx_curr + 1], n_classes))
            last_linear = nn.Sequential(*last_linear)
            self.linear_layers_after.append(last_linear)

        self.graph_layers = nn.ModuleList()
        for num_layer in range(num_layers):
            self.graph_layers.append(
                Graph_Layer_Wrapper(in_out[num_layer],
                                    n_out=in_out[num_layer + 1],
                                    non_lin=non_lin,
                                    method=method))

        self.num_branches = num_layers + 1

        print 'self.num_branches', self.num_branches
Beispiel #32
0
    def __init__(self,
                 n_classes,
                 deno,
                 in_out = None,
                 feat_dim = None,
                 in_out_feat = None,
                 graph_size = None,
                 method = 'cos',
                 sparsify = 0.5,
                 non_lin = 'RL',
                 aft_nonlin = 'RL',
                 aft_nonlin_feat = 'RL',
                 sigmoid = False,
                 layer_bef = None,
                 graph_sum = False,
                 background = False,
                 just_graph = False
                 ):
        super(Graph_Multi_Video, self).__init__()
        
        self.num_classes = n_classes
        self.background = background
        
        if self.background:
            assert sigmoid
            n_classes+=1

        self.deno = deno
        self.graph_size = graph_size
        self.sparsify = sparsify
        self.graph_sum = graph_sum
        self.just_graph = just_graph

        if in_out_feat is None:
            in_out_feat = [2048,1024]
        
        if in_out is None:
            in_out = [1024,512]
        
        if feat_dim is None:
            feat_dim = [1024,256]


        assert feat_dim[0]==in_out_feat[1]==in_out[0]

        # num_layers = 1
        
        # print 'NUM LAYERS', num_layers, in_out
        self.num_branches = 2
        print 'self.num_branches', self.num_branches

        self.bn =None
        # nn.BatchNorm1d(2048, affine = False)
        self.feature = []
        self.feature.append(nn.Linear(in_out_feat[0], in_out_feat[1], bias = True))
        to_pend = aft_nonlin_feat.split('_')
        for tp in to_pend:
            if tp.lower()=='ht':
                self.feature.append(nn.Hardtanh())
            elif tp.lower()=='rl':
                self.feature.append(nn.ReLU())
            elif tp.lower()=='l2':
                self.feature.append(Normalize())
            elif tp.lower()=='ln':
                self.feature.append(nn.LayerNorm(n_out))
            elif tp.lower()=='bn':
                self.feature.append(nn.BatchNorm1d(n_out, affine = False, track_running_stats = False))
            elif tp.lower()=='sig':
                self.feature.append(nn.Sigmoid())
            else:
                error_message = str('non_lin %s not recognized', non_lin)
                raise ValueError(error_message)

        self.feature = nn.Sequential(*self.feature)
        # self.feature_classifier = nn.Linear(in_out[-1],n_classes)


        self.linear_layer = nn.Linear(feat_dim[0], feat_dim[1], bias = True)
        
        self.graph_layer = Graph_Layer_Wrapper(in_out[0],n_out = in_out[1], non_lin = non_lin, method = method, aft_nonlin = aft_nonlin)
        
        last_graph = []
        last_graph.append(nn.Dropout(0.5))
        last_graph.append(nn.Linear(in_out[-1],n_classes))
        if sigmoid:
            last_graph.append(nn.Sigmoid())
        self.last_graph = nn.Sequential(*last_graph)

        last_feat = []
        last_feat.append(nn.Dropout(0.5))
        last_feat.append(nn.Linear(in_out_feat[-1],n_classes))
        if sigmoid:
            last_feat.append(nn.Sigmoid())
        # last_feat.append(nn.Softmax(dim=0))
        self.last_feat = nn.Sequential(*last_feat)
Beispiel #33
0
# Find the AIFH core files
aifh_dir = os.path.dirname(os.path.abspath(__file__))
aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep + "aifh")
sys.path.append(aifh_dir)

from normalize import Normalize

k = 3

# find the Iris data set
irisFile = os.path.dirname(os.path.realpath(__file__))
irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv")

# Read the Iris data set.
print('Reading CSV file: ' + irisFile)
norm = Normalize()
iris_data = norm.load_csv(irisFile)

# Prepare the iris data set.
classes = norm.col_extract(iris_data, 4)
norm.col_delete(iris_data, 4)
for i in range(0, 4):
    norm.make_col_numeric(iris_data, i)

# Cluster the Iris data set.
res, idx = kmeans2(np.array(iris_data), k)

for cluster_num in range(0, k):
    print( "Cluster #" + str(cluster_num + 1))
    for i in range(0, len(idx)):
        if idx[i] == cluster_num:
Beispiel #34
0
class Spider(object):
    header = Headers()
    headers = header.headers()  #初始化获取随机的请求头
    normalize = Normalize()  #格式化url
    items_fans = {}  #用于存储粉丝列表的字典
    items_self = {}  #用于存储个人信息的字典
    redis = Redis()
    mongo = Mongo()
    s_time = 0  #起始时间
    e_time = 0  #程序运行结束时间

    flag = 0  #请求头切换标识
    default_time = 20

    def start_url(self):
        #初始链接
        start_urls = [
            'https://weibo.com/p/1004061537790411?is_hot=1',
        ]
        for start_url in start_urls:
            yield start_url

    def downloader(self, url_item, referer, retries_num=4):
        """
            返回源码
        """
        print("开始下载")
        self.e_time = time.time()  #获取当前时间
        time_dif = self.e_time - self.s_time
        if self.flag == 1:
            time_dif = 400
        flag = 0
        if time_dif > 300:
            self.headers = self.header.headers()  #获取随机的请求头
            self.s_time = self.e_time

        time.sleep(random.random() * 5 +
                   random.random() * 5)  #+ random.randint(1,5))

        if referer:  #判断是否需要防盗链

            self.headers['Referer'] = referer  #添加referer

            url = url_item[0]
            print("待抓取:", url)
            try:
                response = requests.get(url, headers=self.headers, timeout=30)
                #print(self.headers)
                print("状态码:", response.status_code)

                #print(response.text)
                if response.status_code == 200:
                    if len(response.text) > 50000:
                        return response.text
                    else:
                        return None
                else:
                    self.flag = 0  #切换请求头
                    if retries_num > 0:
                        print("第", 4 - retries_num, '次下载')
                        self.downloader(url_item, referer, retries_num - 1)
                    else:
                        self.redis.push(url_item)  #下载失败则重新下载
                        return None
            except requests.exceptions.ConnectionError as e:
                print("downloaderrl错误", url)
                print("错误信息:", str(e))
        else:
            response = requests.get(url, headers=self.headers)
            return response.text

    def parse_follow_page(self, html, referer):
        """
            从个人主页提取pageid, 用于构建 关注的人 的链接, 提取关注的人数,粉丝数
        """
        print("解析函数1")
        p1 = r'<title>(.*?[\u4e00-\u9fa5]{0,})的微博_微博</title>'  #用来匹配这是谁的微博
        p3 = r"\$CONFIG\['page_id'\]='(\d.*?)';"  #用于匹配pageid
        p4 = r"(\d{6})"  #用于从 pageid 中匹配pid

        p5 = r'<strong\sclass=\\"W_f12\\">(\d*?)<\\/strong><span\sclass=\\"S_txt2\\">关注<\\/span>'  #关注的人数
        p6 = r'<strong\sclass=\\"W_f12\\">(\d*?)<\\/strong><span\sclass=\\"S_txt2\\">粉丝<\\/span>'  #粉丝数

        self.items_self = {}

        self.items_self['collection'] = re.search(p1, html).group(
            1)  #谁的主页,用于建立collection
        self.items_self['page_id'] = re.search(p3, html).group(1)  #获得pageid
        self.items_self['pid'] = re.search(p4,
                                           self.items_self['page_id']).group(
                                               1)  #获得pid

        try:
            self.items_self['idol'] = int(re.search(p5, html).group(1))
        except:
            self.items_self['idol'] = '__'  #关注人数不可见,则idol列表不能添加
            print("关注的人数人不可访问")

        try:
            self.items_self['fans'] = int(re.search(p6, html).group(1))
        except:
            self.items_self['fans'] = 0
            print("粉丝数人不可访问")

        if self.items_self['fans'] > 50000:  #这是阻尼系数
            self.items_self['damp'] = 1
        else:
            self.items_self['damp'] = 0.5

        print(self.items_self)
        #self.mongo.save(self.items_self)   #存储
        yield self.items_self  #返回结果用于存储
        if isinstance(self.items_self['idol'], int):
            for url in self.normalize.nor_follow(
                    self.items_self['page_id']):  #关注着页面
                url_item = [url, self.parse_detail, referer]
                yield url_item  #只需返回关注页面的链接即可,其他的直接存储
        else:
            yield None

    def parse_detail(self, html, referer):
        """
            提取每个人的关注页面和首页链接
        """

        print("解析函数2")
        self.items_fans = {}

        p1 = r'<title>(.*?[\u4e00-\u9fa5]{0,})的微博_微博</title>'
        p2 = r'<a\starget=\\"_blank\\"\stitle=\\"(.*?[\u4e00-\u9fa5]{0,})\\"\shref=\\"(.*?)\\"\s>'  #用于匹配粉丝列表

        try:
            results = re.findall(p2, html)

            for result in results:
                if result:

                    collection = re.search(p1, html).group(1)  #控制表
                    idol_name = result[0]  #关注者的名字
                    link = self.normalize.nor_home(result[1].replace(
                        '\\', ''))  #关注者的首页链接

                    if re.search(r'\?', link):  #如果能找到 ‘?’ 则存入数据库
                        self.items_fans = {
                            'collection': collection,
                            'idol_name': idol_name,
                            'link': link,
                        }

                        print(self.items_fans)
                        #self.mongo.save(self.items_fans)  #存储到数据库
                        yield self.items_fans  #返回结果,用于存储
                        url_item = [
                            self.items_fans['link'], self.parse_follow_page,
                            referer
                        ]
                        yield url_item  #将结果返回
                    else:
                        print("链接不符合规定:", link)
                        yield None
        except:
            print("粉丝列表不可访问")

    def scheduler(self):
        #初始化
        #self.redis.delete()  #控制是否在爬虫关闭后继续抓取

        if self.redis.llen() == 0:
            for url in self.start_url():
                callback = self.parse_follow_page
                referer = "https://weibo.com"
                url_item = [url, callback, referer]
                self.redis.push(url_item)

        while True:
            print("开始执行")
            if not self.redis.llen():

                url_item = self.redis.pop()

                url = url_item[0]
                callback = url_item[1]
                referer = url_item[2]

                html = self.downloader(url_item, referer=referer)
                if html is not None:
                    print("html的长度:", len(html))

                    for items in callback(html, url):
                        if isinstance(items, list):
                            print("返回结果是列表")
                            self.redis.push(items)
                        if isinstance(items, dict):
                            print("返回结果是字典")
                            self.mongo.save(items)

                        if items is None:
                            pass  #剔除掉粉丝列表不可看的
                else:
                    print("html的值:", html)
            else:
                break

    def run(self):
        self.scheduler()
    def __init__(
        self,
        n_classes,
        deno,
        in_out=None,
        feat_dim=None,
        graph_size=None,
        method='cos',
        sparsify=False,
        non_lin='HT',
        normalize=[True, True],
        attention=False,
        gk=8,
        aft_nonlin=None,
    ):
        super(Graph_Multi_Video, self).__init__()

        self.num_classes = n_classes
        self.deno = deno
        self.graph_size = graph_size
        self.sparsify = sparsify
        self.gk = gk
        if in_out is None:
            in_out = [2048, 64]
        # if feat_dim is None:
        #     feat_dim = [2048,64]

        num_layers = len(in_out) - 1

        print 'NUM LAYERS', num_layers, in_out

        self.bn = None
        # nn.BatchNorm1d(2048, affine = False)

        # self.linear_layers = nn.ModuleList()
        # self.linear_layers_after = nn.ModuleList()
        # for idx_layer_num,layer_num in enumerate(range(num_layers)):

        #     if non_lin =='HT':
        #         non_lin_curr = nn.Hardtanh()
        #     elif non_lin =='RL':
        #         non_lin_curr = nn.ReLU()
        #     else:
        #         error_message = str('Non lin %s not valid', non_lin)
        #         raise ValueError(error_message)

        #     idx_curr = idx_layer_num*2

        #     self.linear_layers.append(nn.Linear(feat_dim[idx_curr], feat_dim[idx_curr+1], bias = True))

        #     last_linear = []
        #     last_linear.append(non_lin_curr)
        #     if normalize[0]:
        #         last_linear.append(Normalize())
        #     last_linear.append(nn.Dropout(0.5))
        #     last_linear.append(nn.Linear(feat_dim[idx_curr+1],n_classes))
        #     last_linear = nn.Sequential(*last_linear)
        #     self.linear_layers_after.append(last_linear)

        self.graph_layers = nn.ModuleList()
        for num_layer in range(num_layers):
            self.graph_layers.append(
                Graph_Layer_Wrapper(in_out[num_layer],
                                    n_out=in_out[num_layer + 1],
                                    non_lin=non_lin,
                                    method=method,
                                    aft_nonlin=aft_nonlin))

        last_graph = []
        if aft_nonlin is None:
            if non_lin == 'HT':
                last_graph.append(nn.Hardtanh())
            elif non_lin == 'RL':
                last_graph.append(nn.ReLU())
            else:
                error_message = str('Non lin %s not valid', non_lin)
                raise ValueError(error_message)

            if normalize[1]:
                last_graph.append(Normalize())

        last_graph.append(nn.Dropout(0.5))
        last_graph.append(nn.Linear(in_out[-1], n_classes))
        last_graph = nn.Sequential(*last_graph)
        self.last_graph = last_graph

        self.num_branches = 1
        # num_layers+1
        self.attention = attention
        print 'self.num_branches', self.num_branches
Beispiel #36
0
aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep + "aifh")
sys.path.append(aifh_dir)

from normalize import Normalize
from rbf_network import RbfNetwork
from error import ErrorCalculation
from train import TrainAnneal
import numpy as np

# find the Iris data set
irisFile = os.path.dirname(os.path.realpath(__file__))
irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv")

# Read the Iris data set.
print('Reading CSV file: ' + irisFile)
norm = Normalize()
iris_work = norm.load_csv(irisFile)

# Extract the original iris species so we can display during the final validation.
ideal_species = [row[4] for row in iris_work]

# Setup the first four fields to "range normalize" between -1 and 1.
for i in range(0, 4):
    norm.make_col_numeric(iris_work, i)
    norm.norm_col_range(iris_work, i, 0, 1)

# Discover all of the classes for column #4, the iris species.
classes = norm.build_class_map(iris_work, 4)
inv_classes = {v: k for k, v in classes.items()}

# Normalize iris species using one-of-n.
Beispiel #37
0
    def test_normalize_one_of_n(self):
        # find the Iris data set
        irisFile = os.path.dirname(os.path.realpath(__file__))
        irisFile = os.path.abspath(irisFile + "../../../datasets/iris.csv")

        norm = Normalize()

        result = norm.load_csv(irisFile)

        self.assertEqual(len(norm.column_map), 5)
        self.assertEqual(len(norm.header), 5)
        self.assertEqual(norm.header[0], "sepal_length")
        self.assertEqual(norm.header[1], "sepal_width")
        self.assertEqual(norm.header[2], "petal_length")
        self.assertEqual(norm.header[3], "petal_width")
        self.assertEqual(norm.header[4], "class")
        self.assertTrue("sepal_length" in norm.column_map)
        self.assertTrue("sepal_width" in norm.column_map)
        self.assertTrue("petal_length" in norm.column_map)
        self.assertTrue("petal_width" in norm.column_map)
        self.assertTrue("class" in norm.column_map)
        self.assertEqual(norm.resolve_column("sepal_length"), 0)
        self.assertEqual(norm.resolve_column("sepal_width"), 1)
        self.assertEqual(norm.resolve_column("petal_length"), 2)
        self.assertEqual(norm.resolve_column("petal_width"), 3)
        self.assertEqual(norm.resolve_column("class"), 4)
        self.assertRaises(AIFHError, norm.resolve_column, 6)
        self.assertRaises(AIFHError, norm.resolve_column, "unknown")

        for i in range(0, 4):
            norm.make_col_numeric(result, i)
            norm.norm_col_range(result, i, -1, 1)

        self.assertAlmostEqual(result[0][0], -0.555, 2)
        self.assertAlmostEqual(result[0][1], 0.249, 2)
        self.assertAlmostEqual(result[0][2], -0.864, 2)
        self.assertAlmostEqual(result[0][3], -0.916, 2)

        classes = norm.build_class_map(result, 4)
        norm.norm_col_one_of_n(result, 4, classes, -1, 1)
        self.assertEqual(len(classes), 3)
Beispiel #38
0
aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep +
                           "aifh")
sys.path.append(aifh_dir)

from normalize import Normalize
from rbf_network import RbfNetwork
from error import ErrorCalculation
from train import TrainAnneal

# find the Iris data set
irisFile = os.path.dirname(os.path.realpath(__file__))
irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv")

# Read the Iris data set.
print('Reading CSV file: ' + irisFile)
norm = Normalize()
iris_work = norm.load_csv(irisFile)

# Extract the original iris species so we can display during the final validation.
ideal_species = [row[4] for row in iris_work]

# Setup the first four fields to "range normalize" between -1 and 1.
for i in range(0, 4):
    norm.make_col_numeric(iris_work, i)
    norm.norm_col_range(iris_work, i, 0, 1)

# Discover all of the classes for column #4, the iris species.
classes = norm.build_class_map(iris_work, 4)
inv_classes = {v: k for k, v in classes.items()}

# Normalize iris species using one-of-n.
Beispiel #39
0
from pyspark import SparkContext
from pyspark.mllib.feature import HashingTF
from pyspark.mllib.feature import IDF
from pyspark.mllib.linalg import Vectors

from normalize import Normalize
from stopWords import StopWords

import math
import codecs

#utils = Utils(15000)
normalize = Normalize()
sc = SparkContext()
docs = sc.textFile("hdfs://localhost:8020/user/manh/crawler_1")

dicStopWords = {}

stopWords = StopWords('../input/stopwords.txt')
print(stopWords)

num = docs.count()
print("num = %s" % (num))
	

#ghi ra file theo ding dang "idf hash" dung de tao khong gian vector
def writeIdfHash(lst):	
#	idf_hash = codecs.open("../output/idf_hash.txt", "wb", "utf8")
	idf_hash  = open('../output/idf_hash.txt','w')
	i = 0
	for x in lst: