Ejemplo n.º 1
0
def main():
    args = setup_argparser().parse_args()

    filename = args.file
    max_depth = args.max_depth
    min_size = args.min_size
    validate = args.validate

    x, labels = import_file(filename)
    dt = DecisionTree(max_depth=max_depth, min_size=min_size)
    tree = dt.fit(x)
    TreeNode.show_tree(tree)
    predicted_labels = dt.predict(x)
    p, r, f1 = get_metrics(labels, predicted_labels, class_label=1)
    acc = accuracy(labels, predicted_labels)
    print("Naive results")
    print("Accuracy: {}, Precision: {}, Recall: {}, F-1: {}".format(
        acc, p, r, f1))

    if validate:
        ten_cv = CrossValidation(k=10)
        dt = DecisionTree(max_depth=max_depth, min_size=min_size)
        train_scores, val_scores, *_ = ten_cv.cross_validate(dt, x, labels)
        print("10-fold cross validation")
        print("Training scores: {0}\nValidation scores: {1}".format(
            train_scores, val_scores))
    return
Ejemplo n.º 2
0
    def export_excel_to_python(self,
                               infile,
                               outfile,
                               converter_name,
                               sheet_index=0):
        converter = self.find_converter(converter_name)

        input_path = os.path.join(xlsconfig.INPUT_PATH, infile)
        output_path = os.path.join(xlsconfig.TEMP_PATH, outfile + ".py")
        converter_file = os.path.splitext(converter.__file__)[0] + ".py"

        if xlsconfig.FAST_MODE and util.if_file_newer(
                output_path, input_path) and util.if_file_newer(
                    output_path, converter_file):
            data_module = util.import_file(outfile)

        else:
            print infile, "-", converter_name
            parser = self.parser_class(input_path, converter, sheet_index)
            try:
                parser.run()
            except:
                traceback.print_exc()
                return False

            data_module = self.create_data_module(infile, outfile,
                                                  converter_name, parser)
            if data_module is None: return False

        self.store_data_module(data_module)
        return True
Ejemplo n.º 3
0
def main():
    args = setup_argparser().parse_args()

    filepath = args.file
    num_clusters = args.num_clusters

    data, truth_clusters = import_file(filepath, correct_clusters=True)
    points = [Point(x) for x in data]

    aggclustering = AgglomerativeClustering(num_clusters=num_clusters)
    labels = aggclustering.fit(points)
    logging.info("Labels: {}".format(labels))
    logging.info("Rand score: {}".format(rand_score(truth_clusters, labels)))
    logging.info("Jaccard coefficient: {}".format(
        jaccard_coeff(truth_clusters, labels)))

    # We apply PCA dim reduction to both data, and centroids to be able to plot them
    plot(reduce_dimensionality(data),
         truth_clusters,
         None,
         suffix="hierarchical_truth")
    plot(reduce_dimensionality(data),
         labels,
         None,
         suffix="hierarchical_computed")
    return
def main():
    args = setup_argparser().parse_args()
    filepath = args.file
    min_pts = args.min_points
    eps = args.eps

    logging.info(args)

    data, truth_clusters = import_file(filepath, correct_clusters=False)

    db = DBSCAN(eps=eps, min_points=min_pts)
    db.dbscan(data)
    logging.info("Rand Index: {}".format(rand_score(truth_clusters,
                                                    db.labels)))
    logging.info("Jaccard Coefficient: {}".format(
        jaccard_coeff(truth_clusters, db.labels)))
    # There's barely any difference b/w what we classify and what Sklearns does - this looks correct
    # We apply PCA dim reduction to both data, and centroids to be able to plot them
    plot(reduce_dimensionality(data),
         truth_clusters,
         None,
         suffix="dbscan_truth")
    plot(reduce_dimensionality(data),
         db.labels,
         None,
         suffix="dbscan_computed")
    return
Ejemplo n.º 5
0
def create_header_for_excels(excel_files):
    print "=== create header for excels ..."

    if xlsconfig.USE_OPENPYXL:
        from gen_header_with_openpyxl import create_header
    else:
        from gen_header_with_xlrd import create_header

    sys.path.insert(0, xlsconfig.CONVERTER_PATH)

    for value in xlsconfig.CONVENTION_TABLE:
        pattern = value[0]
        converter = import_file(xlsconfig.CONVERTER_ALIAS + "." + value[1])
        if not getattr(converter, "AUTO_GEN_HEADER", True): continue

        new_name = value[2] if len(value) > 2 else None
        sheet_index = value[3] if len(value) > 3 else 0

        compiled_pattern = re.compile(pattern)

        for infile in excel_files:
            if not compiled_pattern.match(infile): continue

            input_file = os.path.join(xlsconfig.INPUT_PATH, infile)

            if create_header(input_file, converter, sheet_index):
                pass

            elif not xlsconfig.FORCE_RUN:
                return

    sys.path.remove(xlsconfig.CONVERTER_PATH)
    return
Ejemplo n.º 6
0
def main():
    args = setup_argparser().parse_args()

    filename = args.file
    num_trees = args.num_trees
    sampling_ratio = args.sampling_ratio
    max_depth = args.max_depth
    min_size = args.min_size
    features_ratio = args.features_ratio

    x, labels = import_file(filename)
    rf = RandomForest(num_trees=num_trees,
                      sampling_ratio=sampling_ratio,
                      max_depth=max_depth,
                      min_size=min_size,
                      features_ratio=features_ratio)
    rf.fit(x)
    predictions = rf.predict(x)
    p, r, f1 = get_metrics(labels, predictions, class_label=1)
    acc = accuracy(labels, predictions)
    print("Naive results")
    print("Accuracy: {}, Precision: {}, Recall: {}, F-1: {}".format(
        acc, p, r, f1))

    ten_cv = CrossValidation(k=10)
    rf = RandomForest(num_trees=num_trees,
                      sampling_ratio=sampling_ratio,
                      max_depth=max_depth,
                      min_size=min_size,
                      features_ratio=features_ratio)
    train_scores, val_scores, *_ = ten_cv.cross_validate(rf, x, labels)
    print("10-fold cross validation")
    print("Training scores: {0}\nValidation scores: {1}".format(
        train_scores, val_scores))
    return
Ejemplo n.º 7
0
	def find_converter(self, name):
		converter = self.converter_modules.get(name)
		if converter is None:
			full_name = xlsconfig.CONVERTER_ALIAS + "." + name
			converter = util.import_file(full_name)
			converter._name = name
			self.converter_modules[name] = converter
		return converter
Ejemplo n.º 8
0
def generate(task_id, token, team):
    read_task_generation_config(
        task_id)  # Make sure that the config is available
    logger.info('Generating task {} with token {}', task_id, token)
    task_dir = os.path.join(configuration['tasks_path'], str(task_id))
    os.makedirs(task_dir, exist_ok=True)

    mod = util.import_file(os.path.join(task_dir, 'generate.py'))
    raw_task = tasks.read_task(task_id)
    gen_task = mod.generate(task=raw_task, token=token, team=team)
    write_generated_task(gen_task, token)
    with open(os.path.join(task_dir, 'generated', token, 'gen_ts'), 'w') as f:
        f.write(str(time.time()))
Ejemplo n.º 9
0
def generate_code():
    print "=== 生成代码类文件 ..."

    configure_file_path = os.path.join(xlsconfig.TEMP_PATH, "configures.py")
    if not os.path.exists(configure_file_path):
        return log_error("配置文件'%s'不存在", configure_file_path)

    sys.path.insert(0, xlsconfig.TEMP_PATH)
    configure_module = util.import_file("configures")
    sys.path.remove(xlsconfig.TEMP_PATH)

    for key, cfg in configure_module.configures.iteritems():
        _generate(cfg["types"], key)
Ejemplo n.º 10
0
    def load_converter(self, name):
        converter = None
        full_path = os.path.join(xlsconfig.CONVERTER_PATH,
                                 xlsconfig.CONVERTER_ALIAS,
                                 name.replace('.', '//') + ".py")
        if not os.path.isfile(full_path):
            return None

        full_name = xlsconfig.CONVERTER_ALIAS + "." + name
        converter = util.import_file(full_name)

        # 此名称有可能是文件夹,要加上校验
        if not hasattr(converter, "CONFIG"):
            return None

        converter._name = name
        return converter
Ejemplo n.º 11
0
    def init(self):
        for module in os.listdir(self.config['modules_path']):
            if module[0] != '_':
                temp = util.import_file(self.config['modules_path'], module)
                if util.check_module(temp):
                    temp.load(self.listeners)

        print("Listeners loaded: ", self.listeners)

        """
        Setup the bots
        """
        for bot_settings in self.config['bots'].values():
            bot = twx.botapi.TelegramBot(bot_settings['api_key'])
            bot.update_bot_info().wait()
            print("Loaded bot: ", bot.username)
            self.bots.append(bot)
Ejemplo n.º 12
0
def main():
    args = setup_argparser().parse_args()
    filepath = args.file
    num_clusters = args.num_clusters
    random_start = args.random_start
    max_iterations = args.num_iterations
    tolerance = args.tolerance
    initial_centroids = args.centroids

    if initial_centroids and len(initial_centroids) != num_clusters:
        raise ValueError(
            "Number of centroids provided does not match number of clusters")

    logging.info(args)

    data, truth_clusters = import_file(filepath, correct_clusters=True)
    # num_clusters = len(set(truth_clusters))

    kmeans = KMeans(num_clusters=num_clusters,
                    tolerance=tolerance,
                    max_iterations=max_iterations)
    centroids = kmeans.fit(data,
                           initial_centroids=initial_centroids,
                           random_start=random_start)
    score = kmeans.score(truth_clusters)

    logging.info("Centroids: {}".format(centroids))
    logging.info("Accuracy Score: {}".format(score))
    logging.info("Rand Index: {}".format(
        rand_score(truth_clusters, kmeans.labels)))
    logging.info("Jaccard Coefficient: {}".format(
        jaccard_coeff(truth_clusters, kmeans.labels)))

    # We apply PCA dim reduction to both data, and centroids to be able to plot them
    plot(reduce_dimensionality(data),
         truth_clusters,
         None,
         suffix="kmeans_truth")
    plot(reduce_dimensionality(data),
         kmeans.labels,
         reduce_dimensionality(centroids),
         suffix="kmeans_computed")
    return
Ejemplo n.º 13
0
def get_options():
    """
    Parses all required arguments either from command-line input
    or terminal prompts
    """
    parser = ArgumentParser(prog="CancerCraft")
    auth_type = parser.add_subparsers(dest='auth_type', help='choose either Mojang or MCLeaks as authentication type')

    auth_type_Mojang = auth_type.add_parser('Mojang', help='Mojang authentication')
    auth_type_Mojang.add_argument('-u', '--user', dest='username', type=str, help='Mojang username', default=None)
    auth_type_Mojang.add_argument('-p', '--pass', dest='password', type=str, help='Mojang password', default=None)

    auth_type_MCLeaks = auth_type.add_parser('MCLeaks', help='MCLeaks authentication')
    auth_type_MCLeaks.add_argument('-n', '--name', dest='name', type=str, help='name of the cached MCLeaks account (leave blank to use a new one)', default=None)

    parser.add_argument('-s', '--server', dest='server', type=str, help='server host or host:port (enclose IPv6 addresses in square brackets)', default=None)

    parser.add_argument('-b', '--bot', dest='bot_path', type=str, help='relative/absoulte path of the bot file (relative path from: ./bots)', default=None)

    parser.add_argument('-i', '--ignore', dest='ignore', action='store_true', help='will assume that non-required variables such as \'name\' were left blank on purpose and thus ignore them', default=False)

    options = parser.parse_args()

    if not options.auth_type:
        options.auth_type = input('Enter the desired authentication type (Mojang | MCLeaks): ')
        
    # TODO maybe don't be case-sensitive
    if options.auth_type == 'Mojang':
        if not hasattr(options, 'username') or not options.username: # TODO maybe use 'easier to ask for forgiveness than permission' (EAFP) over 'look before you leap' (LBYL)
            options.username = input('Enter your username: '******'password') or not options.password:
            options.password = getpass('Enter your password: '******'leave blank for offline mode' (combined with -b)
    elif options.auth_type == 'MCLeaks':
        if not hasattr(options, 'name') or not options.name:
            if options.ignore:
                options.name = None
            else:
                options.name = input('Enter the name of a cached MCLeaks account (leave blank to use a new one): ')
    else:
        raise ValueError(f'Invalid authentication type: \'{options.auth_type:s}\'')

    if not options.server:
        options.server = input('Enter server host or host:port (enclose IPv6 addresses in square brackets): ')

    match = re.match(r'((?P<host>[^\[\]:]+)|\[(?P<addr>[^\[\]]+)\])'
                     r'(:(?P<port>\d+))?$', options.server)

    if match is None:
        raise ValueError('Invalid server address: \'{options.server:s}\'')

    options.address = match.group('host') or match.group('addr')
    options.port = int(match.group('port') or 25565)

    if not options.ignore:
        if not options.bot_path:
            options.bot_path = input('Enter the relative/absoulte path of the bot file (relative path from: ./bots): ')

    if options.bot_path:
        directory = path.dirname(path.realpath(__file__))
        if not options.bot_path.endswith('.py'):
            options.bot_path += '.py'

        if path.isfile(f'{directory:s}/bots/{options.bot_path:s}'):
            options.bot_path = f'{directory:s}/bots/{options.bot_path:s}'
        elif not path.isfile(options.bot_path):
            raise ValueError(f'Invalid bot file path: \'{options.bot_path:s}\'')

        options.Bot = util.import_file('Bot', options.bot_path).Bot
        

    # TODO implement
    if options.auth_type == 'MCLeaks':
        raise NotImplementedError
    ############

    return options