Example #1
0
 def setUp(self):  # pylint: disable=g-missing-super-call
     self.data = read_data("./data/complete_data")
     self.num_dic = {"wing": 0, "ring": 0, "slope": 0, "negative": 0}
     with open("./data/complete_data", "r") as f:
         lines = f.readlines()
         self.num = len(lines)
Example #2
0
  print("train_length:" + str(len(train_data)))
  print("valid_length:" + str(len(valid_data)))
  print("test_length:" + str(len(test_data)))
  return train_data, valid_data, test_data


# Write data to file
def write_data(data_to_write, path):
  with open(path, "w") as f:
    for idx, item in enumerate(data_to_write):  # pylint: disable=unused-variable,redefined-outer-name
      dic = json.dumps(item, ensure_ascii=False)
      f.write(dic)
      f.write("\n")


if __name__ == "__main__":
  data = read_data("./data/complete_data")
  train_names = [
      "hyw", "shiyun", "tangsy", "dengyl", "jiangyh", "xunkai", "negative3",
      "negative4", "negative5", "negative6"
  ]
  valid_names = ["lsj", "pengxl", "negative2", "negative7"]
  test_names = ["liucx", "zhangxy", "negative1", "negative8"]
  train_data, valid_data, test_data = person_split(data, train_names,
                                                   valid_names, test_names)
  if not os.path.exists("./person_split"):
    os.makedirs("./person_split")
  write_data(train_data, "./person_split/train")
  write_data(valid_data, "./person_split/valid")
  write_data(test_data, "./person_split/test")
Example #3
0
    'at_flag', 'dig_ratio', 'special_ch', 'special_ch_kind', 'TLD_id',
    'hash_token_n', 'hostname_a', 'hostname_b', 'hostname_c', 'hostname_ch_n',
    'hostname_d', 'hostname_dig_ratio', 'hostname_e', 'hostname_entropy',
    'hostname_f', 'hostname_g', 'hostname_h', 'hostname_i', 'hostname_is_ip',
    'hostname_j', 'hostname_k', 'hostname_l', 'hostname_len',
    'hostname_letter_ratio', 'hostname_m', 'hostname_n', 'hostname_o',
    'hostname_p', 'hostname_point_n', 'hostname_q', 'hostname_r', 'hostname_s',
    'hostname_std', 'hostname_t', 'hostname_token_n', 'hostname_u',
    'hostname_v', 'hostname_w', 'hostname_x', 'hostname_y', 'hostname_z',
    'pathname_ch_kind', 'pathname_depth', 'pathname_len',
    'pathname_longest_token', 'pathname_std', 'pathname_token_n',
    'search_and_n', 'search_len', 'search_std', 'search_token_n'
]

if __name__ == '__main__':
    data_train, data_cv, data_test = data_split.read_data()

    #  use_sklearn.multi_machine_learing_models(data_train,data_cv)

    print('---------------cv')
    y_cv = data_cv['label'].apply(lambda x: 0 if x == 'good' else 1)
    x_cv = data_cv.drop(['URL', 'label'], axis=1)
    use_sklearn.vote_to_predict(x_cv, y_cv)

    print('---------------test')
    y_test = data_test['label'].apply(lambda x: 0 if x == 'good' else 1)
    x_test = data_test.drop(['URL', 'label'], axis=1)
    use_sklearn.vote_to_predict(x_test, y_test)

    print('---------------combine')
    x_fish = pd.read_csv("fishtank_features.csv")