Ejemplo n.º 1
0
def generate_topics_for_file(data_dir, fname, lda, topic_idx, dictionary):
    topic_id = 0
    #fname_in = os.path.join(data_dir, 'lif', fname[:-5] + '.lif')
    fname_in = os.path.join(data_dir, 'lif', fname)
    fname_out = os.path.join(data_dir, 'top', fname[:-5] + '.lif')
    ensure_directory(fname_out)
    # lif_in = Container(fname_in).payload
    try:
        lif_in = LIF(fname_in)
    except FileNotFoundError:
        print("Warning: file '%s' does not exist" % fname_in)
        return
    lif_out = LIF(json_object=lif_in.as_json())
    # the following three are just to save some space, we get them from the lif
    # file anyway
    lif_out.text.value = None
    lif_out.text.source = fname_in
    lif_out.metadata = {}
    topics_view = _create_view()
    lif_out.views = [topics_view]
    topics_view.annotations.append(markable_annotation(lif_in))
    doc = prepare_text_for_lda(lif_in.text.value)
    bow = dictionary.doc2bow(doc)
    for topic in lda.get_document_topics(bow):
        topic_id += 1
        # these are tuples of topic_id and score
        lemmas = get_lemmas_from_topic_name(topic_idx.get(topic[0]))
        # print('   %3d  %.04f  %s' % (topic[0], topic[1], lemmas))
        topics_view.annotations.append(
            topic_annotation(topic, topic_id, lemmas))
    lif_out.write(fname=fname_out, pretty=True)
Ejemplo n.º 2
0
def process_list_element(data_dir, n, fname):
    print("%07d  %s" % (n, fname))
    jsn_file = os.path.join(data_dir, 'jsn', fname)
    lif_file = os.path.join(data_dir, 'lif', fname[:-4] + 'lif')
    txt_file = os.path.join(data_dir, 'txt', fname[:-4] + 'txt')
    ensure_directory(lif_file, txt_file)
    create_lif_file(jsn_file, lif_file, txt_file)
def generate_metadata(data_dir, fname):

    subdir = os.path.split(fname)[0]
    lif_file = os.path.join(data_dir, 'lif', subdir, "tesseract-300dpi-20p.lif")
    ner_file = os.path.join(data_dir, 'ner', subdir, "%s.ner.lif" % subdir)
    mta_file = os.path.join(data_dir, 'mta', subdir, "%s.mta.lif" % subdir)
    ensure_directory(mta_file)

    lif = Container(lif_file).payload
    lif_ner = Container(ner_file).payload
    lif_mta = LIF(json_object=lif.as_json())
    lif_mta.text.value = None
    lif_mta.text.fname = lif_file
    lif_mta.views = []
    lif.metadata["authors"] = []
    lif.metadata["year"] = None

    page_view = lif.get_view("pages")
    ner_view = lif_ner.get_view('v2')

    window = _get_window(page_view)
    lif.metadata["authors"] = _get_authors(lif, ner_view, window)
    lif.metadata["year"] = _get_year(ner_view, window)

    lif_mta.write(fname=mta_file, pretty=True)
Ejemplo n.º 4
0
def compile_all_using_make_config(abis):
	import time
	start_time = time.time()

	std_includes = make_config.get_path("toolchain/stdincludes")
	cache_dir = make_config.get_path("toolchain/build/gcc")
	ensure_directory(cache_dir)
	mod_structure.cleanup_build_target("native")

	overall_result = CODE_OK
	for native_dir in make_config.get_filtered_list("compile", prop="type", values=("native",)):
		if "source" not in native_dir:
			print("skipped invalid native directory json", native_dir, file=sys.stderr)
			overall_result = CODE_INVALID_JSON
			continue
		for native_dir_path in make_config.get_paths(native_dir["source"]):
			if os.path.isdir(native_dir_path):
				directory_name = os.path.basename(native_dir_path)
				result = build_native_dir(
					native_dir_path,
					mod_structure.new_build_target("native", directory_name + "{}"),
					os.path.join(cache_dir, directory_name),
					abis,
					std_includes,
					BaseConfig(native_dir["rules"] if "rules" in native_dir else {})
				)
				if result != CODE_OK:
					overall_result = result
			else:
				print("skipped non-existing native directory path", native_dir["source"], file=sys.stderr)
				overall_result = CODE_INVALID_PATH

	mod_structure.update_build_config_list("nativeDirs")
	print(f"completed native build in {int((time.time() - start_time) * 100) / 100}s with result {overall_result} - {'OK' if overall_result == CODE_OK else 'ERROR'}")
	return overall_result
Ejemplo n.º 5
0
def process_list_element(source_dir, data_dir, fname, test=False):
    src_file = os.path.join(source_dir, fname)
    lif_file = os.path.join(data_dir, 'lif', fname[:-4] + '.lif')
    if test:
        test_lif_file(lif_file)
    else:
        ensure_directory(lif_file)
        create_lif_file(src_file, lif_file)
Ejemplo n.º 6
0
	def cleanup_build_target(self, target_type_name):
		target_type = BUILD_TARGETS[target_type_name]
		self.targets[target_type_name] = []
		if target_type.directory == "":
			return
		directory = os.path.join(self.directory, target_type.directory)
		clear_directory(directory)
		ensure_directory(directory)
Ejemplo n.º 7
0
def split_encounter_files(
        cohort_file,
        hourly_file,
        out_dir,
        cohort_encounter_column="encounter_id",
        hourly_encounter_column="pat_enc_csn_id",
        chunksize=10**6,
        split_cohort=False):
    logging.info("Splitting dataset to individual files for each encounter ID to {}".format(out_dir))
    utils.ensure_directory(out_dir)
    clean_data_dir(out_dir)

    # Initialize all encounter files with cohort data
    if split_cohort:
        logging.info("Splitting cohort-level data from {} to individual encounter files".format(cohort_file))
        cohort_df = pd.read_csv(cohort_file, sep='\t')
        cohort_encounter_ids = sorted(cohort_df[cohort_encounter_column].unique())
        with tqdm(total=len(cohort_df)) as t:
            for ei, encounter_id in enumerate(cohort_encounter_ids):
                enc_chunk = cohort_df.loc[(cohort_df[cohort_encounter_column] == encounter_id), :]
                encounter_filepath = os.path.join(out_dir, "enc_{}.h5".format(encounter_id))
                enc_chunk.to_hdf(encounter_filepath, key="cohort", append=True, mode='a', format='t')
                t.update()
                # if ei >= 99: break

    logging.info("Splitting hourly-level data from {} to individual encounter files".format(hourly_file))
    hourly_df_chunks = pd.read_csv(hourly_file, sep='\t', chunksize=chunksize)
    num_rows = utils.count_csv_rows(hourly_file, sep='\t')

    num_chunks = math.ceil(num_rows / chunksize)
    logging.info("Reading {} chunks of size {} from {} rows".format(num_chunks, chunksize, num_rows))
    start_flag = True
    unknown_encounters = set()
    known_encounter_count = 0
    with tqdm(total=num_chunks, desc="Chunks") as t1:
        for ci, chunk in enumerate(hourly_df_chunks):
            if ci == 0:
                logging.info("Data types:\n{}".format(pformat(chunk.dtypes)))

            chunk_encounter_ids = sorted(chunk[hourly_encounter_column].unique())
            with tqdm(total=len(chunk_encounter_ids), desc="Encounter IDs in Chunk") as t2:
                for ei, encounter_id in enumerate(chunk_encounter_ids):
                    enc_chunk = chunk.loc[(chunk[hourly_encounter_column] == encounter_id), :]
                    encounter_filepath = os.path.join(out_dir, "itan_hourly_enc_{}.h5".format(encounter_id))
                    if not os.path.isfile(encounter_filepath):
                        logging.debug("Adding hourly data to encounter with unknown cohort data: {}".format(encounter_id))
                        unknown_encounters.add(encounter_id)
                    else:
                        known_encounter_count += 1
                    logging.debug("Appending {} samples for encounter ID {} to {}".format(len(enc_chunk), encounter_id, encounter_filepath))
                    enc_chunk.to_hdf(encounter_filepath, key="hourly", append=True, mode='a', format='t')#, complib='bzip2', complevel=complevel)
                    t2.update()
                    # if ei >= 99: break
            t1.update()
            # if ci >= 0: break

    logging.info("Added hourly data to {} encounter files with unknown IDs.  {} with known IDs".format(
        len(unknown_encounters), known_encounter_count))
Ejemplo n.º 8
0
 def save(self, json_path):
     # Save parameters to json file
     import utils
     import pathlib
     utils.ensure_directory(os.path.dirname(json_path))
     for k, v in self.__dict__.items():
         if type(v) is pathlib.PosixPath:
             self.__dict__[k] = str(v)
     with open(json_path, 'w') as f:
         json.dump(self.__dict__, f, indent=4)
Ejemplo n.º 9
0
def make_dataset(first_directory, second_directory, output_directory):
    """Create and save training, validation, and test datasets.

    Datasets will be saved as numpy arrays as the files train_data.npy,
    train_labels.npy, validate_data.npy, validate_labels.npy, test_data.npy,
    and test_labels.npy in the output directory.

    Args:
        first_directory (str): The relative path to a directory of images in
            the first category the neural network should distinguish between.
        second_directory (str): The relative path to a directory of images in
            the second category the neural network should distinguish between.
        output_directory (str): The relative path to a directory for the output
            nparrays to be saved to.
    """
    pairs = list(iter_image_paths(first_directory, second_directory))
    numpy.random.shuffle(pairs)
    paths, indicators = zip(*pairs)

    n = len(paths)
    n_train = 8 * n / 10
    n_validate = 1 * n / 10
    # n_test = n - (n_train + n_validate)

    ensure_directory(output_directory)

    train_data = numpy.array(load_images(paths[:n_train]))
    train_labels = numpy.array(indicators[:n_train])

    numpy.save(
        '{}/train_data.npy'.format(output_directory),
        train_data.astype(numpy.float32))
    numpy.save(
        '{}/train_labels.npy'.format(output_directory),
        train_labels.astype(numpy.int32))

    validate_data = numpy.array(load_images(paths[n_train:n_train+n_validate]))
    validate_labels = numpy.array(indicators[n_train:n_train+n_validate])

    numpy.save(
        '{}/validate_data.npy'.format(output_directory),
        validate_data.astype(numpy.float32))
    numpy.save(
        '{}/validate_labels.npy'.format(output_directory),
        validate_labels.astype(numpy.int32))

    test_data = numpy.array(load_images(paths[n_train+n_validate:]))
    test_labels = numpy.array(indicators[n_train+n_validate:])

    numpy.save(
        '{}/test_data.npy'.format(output_directory),
        test_data.astype(numpy.float32))
    numpy.save(
        '{}/test_labels.npy'.format(output_directory),
        test_labels.astype(numpy.int32))
Ejemplo n.º 10
0
def run_tarsqi_for_file(data_dir, fname):
    lif_file = os.path.join(data_dir, 'lif', fname[:-4] + '.lif')
    ttk_file = os.path.join(data_dir, 'ttk', fname[:-4] + '.lif')
    ensure_directory(ttk_file)
    lif = Container(lif_file).payload
    text = lif.text.value
    doc = parse_text(text)
    if COMPRESS:
        with gzip.open(ttk_file + '.gz', 'wb') as fh:
            doc.print_all_lif(fh)
    else:
        with open(ttk_file, 'w') as out:
            doc.print_all_lif(out)
Ejemplo n.º 11
0
def lookup_technologies(data_dir, fname):
    subdir = os.path.split(fname)[0]
    pos_file = os.path.join(data_dir, 'pos', subdir, "%s.pos.lif" % subdir)
    tex_file = os.path.join(data_dir, 'tex', subdir, "%s.lup.lif" % subdir)
    ensure_directory(tex_file)
    lif = Container(pos_file).payload
    lif_tex = LIF(json_object=lif.as_json())
    pos_view = lif.get_view('v2')
    tex_view = create_view('tex', 'Technology', 'dtriac-pipeline:lookup.py')
    lif_tex.views = [tex_view]
    tokens = [a for a in pos_view.annotations if a.type.endswith('Token')]
    _lookup_technologies_in_tokens(lif, tokens, tex_view)
    lif_tex.write(fname=tex_file, pretty=True)
Ejemplo n.º 12
0
def make_dataset(first_directory, second_directory, output_directory):
    """Create and save training, validation, and test datasets.

    Datasets will be saved as numpy arrays as the files train_data.npy,
    train_labels.npy, validate_data.npy, validate_labels.npy, test_data.npy,
    and test_labels.npy in the output directory.

    Args:
        first_directory (str): The relative path to a directory of images in
            the first category the neural network should distinguish between.
        second_directory (str): The relative path to a directory of images in
            the second category the neural network should distinguish between.
        output_directory (str): The relative path to a directory for the output
            nparrays to be saved to.
    """
    pairs = list(iter_image_paths(first_directory, second_directory))
    numpy.random.shuffle(pairs)
    paths, indicators = zip(*pairs)

    n = len(paths)
    n_train = 8 * n / 10
    n_validate = 1 * n / 10
    # n_test = n - (n_train + n_validate)

    ensure_directory(output_directory)

    train_data = numpy.array(load_images(paths[:n_train]))
    train_labels = numpy.array(indicators[:n_train])

    with open('{}/train_data.npy'.format(output_directory), 'wb') as f:
        numpy.save(f, train_data.astype(numpy.float32))
    with open('{}/train_labels.npy'.format(output_directory), 'wb') as f:
        numpy.save(f, train_labels.astype(numpy.int32))

    validate_data = numpy.array(
        load_images(paths[n_train:n_train + n_validate]))
    validate_labels = numpy.array(indicators[n_train:n_train + n_validate])

    with open('{}/validate_data.npy'.format(output_directory), 'wb') as f:
        numpy.save(f, validate_data.astype(numpy.float32))
    with open('{}/validate_labels.npy'.format(output_directory), 'wb') as f:
        numpy.save(f, validate_labels.astype(numpy.int32))

    test_data = numpy.array(load_images(paths[n_train + n_validate:]))
    test_labels = numpy.array(indicators[n_train + n_validate:])

    with open('{}/test_data.npy'.format(output_directory), 'wb') as f:
        numpy.save(f, test_data.astype(numpy.float32))
    with open('{}/test_labels.npy'.format(output_directory), 'wb') as f:
        numpy.save(f, test_labels.astype(numpy.int32))
Ejemplo n.º 13
0
def main():
    args = parser.parse_args()
    data_json = read_dataset(args.data)

    processor = TextProcessor()
    classifier = Classifier(processor)
    classifier.train(data_json)

    serialized_classifier = classifier.dump()

    ensure_directory(args.output)
    with open(args.output, 'w') as f:
        f.write(serialized_classifier)
        f.write(os.linesep)
Ejemplo n.º 14
0
def task_build_package():
    import shutil
    output_dir = get_make_config().get_path("output")
    output_file = get_make_config().get_path("mod.icmod")
    output_file_tmp = get_make_config().get_path("toolchain/build/mod.zip")
    ensure_directory(output_dir)
    ensure_file_dir(output_file_tmp)
    if os.path.isfile(output_file):
        os.remove(output_file)
    if os.path.isfile(output_file_tmp):
        os.remove(output_file_tmp)
    shutil.make_archive(output_file_tmp[:-4], 'zip', output_dir)
    os.rename(output_file_tmp, output_file)
    return 0
Ejemplo n.º 15
0
def main():
    args = parser.parse_args()
    data_json = read_dataset(args.data)

    processor = TextProcessor()
    classifier = Classifier(processor)
    classifier.train(data_json)

    serialized_classifier = classifier.dump()

    ensure_directory(args.output)
    with open(args.output, 'w') as f:
        f.write(serialized_classifier)
        f.write(os.linesep)
Ejemplo n.º 16
0
def task_build_package():
	import shutil
	config = get_make_config()
	output_dir = config.get_path(os.path.join("output/debug", config.get_mod_dir()))
	ensure_directory(config.get_path("output/release"))
	output_file = config.get_path("output/release/"+config.get_mod_dir() + ".icmod")
	output_file_tmp = config.get_path("toolchain/build/mod.zip")
	ensure_directory(output_dir)
	ensure_file_dir(output_file_tmp)
	if os.path.isfile(output_file):
		os.remove(output_file)
	if os.path.isfile(output_file_tmp):
		os.remove(output_file_tmp)
	shutil.make_archive(output_file_tmp[:-4], 'zip', output_dir)
	os.rename(output_file_tmp, output_file)
	return 0
Ejemplo n.º 17
0
def task_build_package():
    import shutil
    config = get_make_config()
    output_dir = config.get_project_path("output")
    output_file = config.get_project_path(
        config.get_value("currentProject", "mod") + ".icmod")
    output_file_tmp = config.get_path("toolchain/build/mod.zip")
    ensure_directory(output_dir)
    ensure_file_dir(output_file_tmp)
    if os.path.isfile(output_file):
        os.remove(output_file)
    if os.path.isfile(output_file_tmp):
        os.remove(output_file_tmp)
    shutil.make_archive(output_file_tmp[:-4], 'zip', output_dir)
    os.rename(output_file_tmp, output_file)
    return 0
Ejemplo n.º 18
0
 def __init__(self, r, c, rootDir, date):
     super().__init__(r, c)
     self.dx = 120
     self.dy = 90
     self.rows = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
     self.cols = [
         '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12'
     ]
     self.wells = ['a', 'b', 'c']
     self.resizedpath = Path(rootDir).joinpath("Image_Data", date,
                                               "Miniatures")
     ensure_directory(self.resizedpath)
     self.miniatures = [
         os.path.join(self.resizedpath, file)
         for file in os.listdir(self.resizedpath)
         if os.path.splitext(file)[1] in Ext
     ]
Ejemplo n.º 19
0
def task_build_package():
    import shutil
    config = get_make_config()
    output_dir = config.get_path("output")
    mod_folder = config.get_value("make.modFolder")
    output_file = config.get_path(mod_folder + ".icmod")
    output_root_tmp = config.get_path("toolchain/build")
    output_dir_tmp = output_root_tmp + "/" + mod_folder
    output_file_tmp = output_root_tmp + "/mod.zip"
    ensure_directory(output_dir)
    ensure_file_dir(output_file_tmp)
    if os.path.isfile(output_file):
        os.remove(output_file)
    if os.path.isfile(output_file_tmp):
        os.remove(output_file_tmp)
    shutil.move(output_dir, output_dir_tmp)
    shutil.make_archive(output_file_tmp[:-4], 'zip', output_root_tmp,
                        mod_folder)
    os.rename(output_file_tmp, output_file)
    shutil.move(output_dir_tmp, output_dir)
    return 0
def generate_sentence_types(data_dir, fname):

    subdir = os.path.split(fname)[0]
    lif_file = os.path.join(data_dir, 'lif', subdir, "tesseract-300dpi-20p.lif")
    spl_file = os.path.join(data_dir, 'spl', subdir, "%s.spl.lif" % subdir)
    sen_file = os.path.join(data_dir, 'sen', subdir, "%s.sen.lif" % subdir)
    ensure_directory(sen_file)

    if DEBUG:
        SENTS.write(">>> %s\n>>> %s\n>>> %s\n\n" % ('-' * 100, fname, '-' * 100))

    lif = Container(lif_file).payload
    lif_spl = Container(spl_file).payload
    lif_sen = LIF(json_object=lif.as_json())

    spl_sentences_view = lif_spl.get_view('v2')
    new_sentences_view = _create_view()
    lif_sen.views = [new_sentences_view]

    good_sentences = 0
    bad_sentences = 0

    for anno in spl_sentences_view.annotations:
        if anno.type.endswith('Sentence'):
            sc = SentenceClassifier(lif, anno, WORDS)
            if sc.is_crap():
                if DEBUG:
                    SENTS.write("---- %f\n%s\n\n" % (sc.ratio, repr(sc.text)))
                anno.features['type'] = 'crap'
                bad_sentences += 1
            else:
                if DEBUG:
                    SENTS.write("++++ %f\n%s\n\n" % (sc.ratio, repr(sc.text)))
                anno.features['type'] = 'normal'
                good_sentences += 1
            new_sentences_view.annotations.append(anno)
    if DEBUG:
        SENTS.write("\nTOTAL GOOD = {:d}\nTOTAL BAD  = {:d}\n\n\n".format(good_sentences, bad_sentences))

    lif_sen.write(fname=sen_file, pretty=True)
Ejemplo n.º 21
0
def generate_topics_for_file(data_dir, fname, lda, topic_idx, dictionary):
    topic_id = 0
    fname_in = os.path.join(data_dir, 'lif', fname[:-4] + '.lif')
    fname_out = os.path.join(data_dir, 'top', fname[:-4] + '.lif')
    ensure_directory(fname_out)
    lif_in = Container(fname_in).payload
    lif_out = LIF(json_object=lif_in.as_json())
    # just to save some space, we get them from the lif file anyway
    lif_out.metadata = {}
    topics_view = _create_view()
    lif_out.views = [topics_view]
    topics_view.annotations.append(markable_annotation(lif_in))
    doc = prepare_text_for_lda(lif_in.text.value)
    bow = dictionary.doc2bow(doc)
    for topic in lda.get_document_topics(bow):
        topic_id += 1
        # these are tuples of topic_id and score
        lemmas = get_lemmas_from_topic_name(topic_idx.get(topic[0]))
        # print('   %3d  %.04f  %s' % (topic[0], topic[1], lemmas))
        topics_view.annotations.append(
            topic_annotation(topic, topic_id, lemmas))
    lif_out.write(fname=fname_out, pretty=True)
Ejemplo n.º 22
0
def main():
    from config import config_object
    try:
        username = sys.argv[1]
        password = sys.argv[2]
        logger.info(f"USERNAME=={username} and PASSWORD == {password}")
    except:
        logger.error("Please provide username and password for your github")
    print("Execution started")

    try:
        inst = GithubIdentity("github.com", "Macpod")
        inst.add(username, password)

    except Exception as e:
        logger.error(e)

    #generate_new_keys(username, password)
    # dirname = os.path.dirname(os.path.abspath(__file__))
    # output_directory = os.path.join(dirname, "account")
    # if args.lfs_clone:
    #     check_git_lfs_install()
    logger.info('Backing up user {0} to {1}'.format(
        username, config_object.GITHUB_OUTPUT_DIR))

    ensure_directory(config_object.GITHUB_OUTPUT_DIR)

    authenticated_user = get_authenticated_user(username, password)

    logger.info(
        f"The user for which the backup will happend {authenticated_user['login']}"
    )
    repositories = retrieve_repositories(username, password)
    #repositories = filter_repositories(args, repositories)
    backup_repositories(username, password, config_object.GITHUB_OUTPUT_DIR,
                        repositories)
def main():
    # -------------------------------
    #         PARSE ARGUMENTS
    # -------------------------------
    arg_names = ['command', 'dataset_name', 'dataset_type', 'snapshot_num']
    if len(sys.argv) != 4:
        print("Please check the arguments.\n")
        print("Example usage:")
        print("python ./.../prepare_snapshots.py Twitter16 sequential 3")
        exit()
    args = dict(zip(arg_names, sys.argv))
    dataset = args['dataset_name']
    dataset_type = args['dataset_type']
    snapshot_num = int(args['snapshot_num'])
    print_dict(args)

    # --------------------------
    #         INIT PATHS
    # --------------------------
    paths = {}
    if dataset in ['Twitter15', 'Twitter16']:
        pass
        # paths['resource_label'] = './resources/{0}/{0}_label_all.txt'.format(dataset)
        # paths['resource_tree'] = './resources/{0}/data.TD_RvNN.vol_5000.txt'.format(dataset)
        # paths['timestamps'] = './data/timestamps/{}/timestamps.txt'.format(dataset)
        # paths['snapshot_index'] = './data/timestamps/{}/{}_snapshots_{:02}.txt'.format(dataset, dataset_type, snapshot_num)
        # paths['graph'] = './data/graph/{0}/{1}_snapshot/'.format(dataset, dataset_type)
    elif dataset in ['Weibo']:
        # TODO: check if temporal ->

        paths['resource_label'] = './resources/{0}/weibo_id_label.txt'.format(dataset)
        paths['resource_tree'] = './resources/{0}/weibotree.txt'.format(dataset)
        paths['resource_tree_cache'] = './resources/{0}/weibotree_cache.json'.format(dataset)
        paths['timestamps'] = './data/timestamps/{}/timestamps.txt'.format(dataset)
        paths['sequential_snapshots'] = './data/timestamps/{}/sequential_snapshots_{:02}.txt'.format(dataset, snapshot_num)
        paths['snapshot_index'] = './data/timestamps/{}/{}_snapshots_{:02}.txt'.format(dataset, dataset_type, snapshot_num)
        paths['graph'] = './data/graph/{0}/{1}_snapshot/'.format(dataset, dataset_type)

    else:
        exit()

    # ----------------------------------
    #         GENERATE SNAPSHOTS
    # ----------------------------------

    id_label_dict, _ = load_labels(paths['resource_label'])

    # sequences_dict = load_json_file(paths['snapshot_index'])
    # trees_dict = load_snapshot_trees_weibo(paths, id_label_dict, sequences_dict, snapshot_num)
    # save_json_file(paths['resource_tree_cache'], trees_dict)
    trees_dict = load_json_file(paths['resource_tree_cache'])  # cache

    ensure_directory(paths['graph'])

    error_list = []
    for index, event_id in enumerate(id_label_dict.keys()):

        print(event_id)

        if event_id not in trees_dict:
            continue

        if event_id not in [
            '3501902090262385','3907580407356244', '3907742282069764', '3909081075061253',
            '3909155720971721', '3914408365363135', '3684095995971132', '3466379833885944',
            '3500947630475466', '3523166905046601', '3547825524904328']:
            continue


        if len(trees_dict[event_id]['0']) < 2:  # '0' after json load
            print("no responsive post", event_id, len(trees_dict[event_id][0]))
            continue

        for snapshot_index in range(snapshot_num):
            TweetTree(
                paths['graph'],
                event_id,
                id_label_dict[event_id],
                trees_dict[event_id][str(snapshot_index)],
                snapshot_index,
                snapshot_num,
            )

        """
        try:
            if len(trees_dict[event_id][0]) < 2:  # no responsive post
                print("no responsive post", event_id, len(trees_dict[event_id][0]))
                continue

            for snapshot_index in range(snapshot_num):
                TweetTree(
                    paths['graph'],
                    event_id,
                    id_label_dict[event_id],
                    trees_dict[event_id][snapshot_index],
                    snapshot_index,
                    snapshot_num,
                )
        except:
            error_list.append(event_id)
            # 11 ERRORS
            # ['3501902090262385', '3907580407356244', '3907742282069764', '3909081075061253', '3909155720971721', '3914408365363135', '3684095995971132', '3466379833885944', '3500947630475466', '3523166905046601', '3547825524904328']
        """


    print
    print(error_list, len(error_list))
Ejemplo n.º 24
0
def build_native_dir(directory, output_dir, cache_dir, abis, std_includes_path, rules: BaseConfig):
	executables = {}
	for abi in abis:
		executable = prepare_compiler_executable(abi)
		if executable is None:
			print("failed to acquire GCC executable from NDK for abi " + abi)
			return CODE_FAILED_NO_GCC
		executables[abi] = executable

	try:
		manifest = get_manifest(directory)
		targets = {}
		soname = "lib" + manifest["shared"]["name"] + ".so"
		for abi in abis:
			targets[abi] = os.path.join(output_dir, "so/" + abi + "/" + soname)
	except Exception as err:
		print("failed to read manifest for directory " + directory + " error: " + str(err))
		return CODE_FAILED_INVALID_MANIFEST

	keep_sources = rules.get_value("keepSources", fallback=False)
	if keep_sources:
		# copy everything and clear build files
		copy_directory(directory, output_dir, clear_dst=True)
		clear_directory(os.path.join(output_dir, "so"))
		os.remove(os.path.join(output_dir, soname))
	else:
		clear_directory(output_dir)

		# copy manifest
		copy_file(os.path.join(directory, "manifest"), os.path.join(output_dir, "manifest"))

		# copy includes
		keep_includes = rules.get_value("keepIncludes", fallback=True)
		for include_path in manifest["shared"]["include"]:
			src_include_path = os.path.join(directory, include_path)
			output_include_path = os.path.join(output_dir, include_path)
			if keep_includes:
				copy_directory(src_include_path, output_include_path, clear_dst=True)
			else:
				clear_directory(output_include_path)

	std_includes = []
	for std_includes_dir in os.listdir(std_includes_path):
		std_includes.append(os.path.abspath(os.path.join(std_includes_path, std_includes_dir)))

	# compile for every abi
	overall_result = CODE_OK
	for abi in abis:
		printed_compilation_title = f"compiling {os.path.basename(directory)} for {abi}"
		print("\n")
		print(f"{'=' * (48 - len(printed_compilation_title) // 2)} {printed_compilation_title} {'=' * (48 - (1 + len(printed_compilation_title)) // 2)}")

		executable = executables[abi]
		gcc = [executable, "-std=c++11"]
		includes = []
		for std_includes_dir in std_includes:
			includes.append(f'-I{std_includes_dir}')
		dependencies = [f'-L{get_fake_so_dir(abi)}', "-landroid", "-lm", "-llog"]
		for link in rules.get_value("link", fallback=[]) + make_config.get_value("make.linkNative", fallback=[]) + ["horizon"]:
			add_fake_so(executable, abi, link)
			dependencies.append(f'-l{link}')
		if "depends" in manifest:
			search_dir = os.path.abspath(os.path.join(directory, ".."))  # always search for dependencies in current dir
			for dependency in manifest["depends"]:
				if dependency is not None:
					add_fake_so(executable, abi, dependency)
					dependencies.append("-l" + dependency)
					dependency_dir = search_directory(search_dir, dependency)
					if dependency_dir is not None:
						try:
							for include_dir in get_manifest(dependency_dir)["shared"]["include"]:
								includes.append("-I" + os.path.join(dependency_dir, include_dir))
						except KeyError:
							pass
				else:
					print(f"ERROR: dependency directory {dependency} is not found, it will be skipped")

		# prepare directories
		source_files = get_all_files(directory, extensions=(".cpp", ".c"))
		preprocessed_dir = os.path.abspath(os.path.join(cache_dir, "preprocessed", abi))
		ensure_directory(preprocessed_dir)
		object_dir = os.path.abspath(os.path.join(cache_dir, "object", abi))
		ensure_directory(object_dir)

		# pre-process and compile changes
		import filecmp
		object_files = []
		recompiled_count = 0
		for file in source_files:
			relative_file = relative_path(directory, file)
			sys.stdout.write("preprocessing " + relative_file + " " * 64 + "\r")

			object_file = os.path.join(object_dir, relative_file) + ".o"
			preprocessed_file = os.path.join(preprocessed_dir, relative_file)
			tmp_preprocessed_file = preprocessed_file + ".tmp"
			ensure_file_dir(preprocessed_file)
			ensure_file_dir(object_file)
			object_files.append(object_file)

			result = subprocess.call(gcc + ["-E", file, "-o", tmp_preprocessed_file] + includes)
			if result == CODE_OK:
				if not os.path.isfile(preprocessed_file) or not os.path.isfile(object_file) or \
						not filecmp.cmp(preprocessed_file, tmp_preprocessed_file):
					if os.path.isfile(preprocessed_file):
						os.remove(preprocessed_file)
					os.rename(tmp_preprocessed_file, preprocessed_file)
					if os.path.isfile(object_file):
						os.remove(object_file)

					sys.stdout.write("compiling " + relative_file + " " * 64 + "\n")
					result = max(result, subprocess.call(gcc + ["-c", preprocessed_file, "-shared", "-o", object_file]))
					if result != CODE_OK:
						if os.path.isfile(object_file):
							os.remove(object_file)
						overall_result = result
					else:
						recompiled_count += 1
			else:
				if os.path.isfile(object_file):
					os.remove(object_file)
				overall_result = result

		print(" " * 128)
		if overall_result != CODE_OK:
			print("failed to compile", overall_result)
			return overall_result
		else:
			print(f"recompiled {recompiled_count}/{len(object_files)} files with result {overall_result}")

		ensure_file_dir(targets[abi])

		command = []
		command += gcc
		command += object_files
		command.append("-shared")
		command.append("-Wl,-soname=" + soname)
		command.append("-o")
		command.append(targets[abi])
		command += includes
		command += dependencies
		print("linking object files...")
		result = subprocess.call(command)
		if result == CODE_OK:
			print("build successful")
		else:
			print("linker failed with result code", result)
			overall_result = result
			return overall_result
	return overall_result
Ejemplo n.º 25
0
def get_fake_so_dir(abi):
	fake_so_dir = make_config.get_path(os.path.join("toolchain/ndk/fakeso", abi))
	ensure_directory(fake_so_dir)
	return fake_so_dir
Ejemplo n.º 26
0
def import_build_config(make_file, source, destination):
    global root_files
    root_files.append("build.config")

    build_config = os.path.join(source, "build.config")
    with open(build_config, "r", encoding="utf-8") as config_file:
        config_obj = json.loads(config_file.read())
        config = BaseConfig(config_obj)
        make_file["global"]["api"] = config.get_value("defaultConfig.api",
                                                      "CoreEngine")

        src_dir = os.path.join(destination, "src")

        # clear assets folder
        assets_dir = os.path.join(src_dir, "assets")
        clear_directory(assets_dir)
        os.makedirs(assets_dir)

        # some pre-defined resource folders
        resources = [{
            "path": "src/assets/resource_packs/*",
            "type": "minecraft_resource_pack"
        }, {
            "path": "src/assets/behavior_packs/*",
            "type": "minecraft_behavior_pack"
        }]

        os.makedirs(os.path.join(assets_dir, "resource_packs"))
        os.makedirs(os.path.join(assets_dir, "behavior_packs"))

        # import assets
        for res_dir in config.get_filtered_list("resources", "resourceType",
                                                ("resource", "gui")):
            if res_dir["resourceType"] == "resource":
                res_dir["resourceType"] = "resource_directory"
            path_stripped = res_dir["path"].strip('/')
            path_parts = path_stripped.split('/')
            path = os.path.join(*path_parts)
            copy_directory(os.path.join(source, path),
                           os.path.join(assets_dir, path), True)
            resources.append({
                "path": "src/assets/" + path_stripped,
                "type": res_dir["resourceType"]
            })

            root_files.append(path_parts[0])

        make_file["resources"] = resources

        # clear libraries folder and copy libraries from the old project
        libs_dir = os.path.join(destination, "src", "lib")
        clear_directory(libs_dir)
        clear_directory(os.path.join(destination, "src", "dev"))
        os.makedirs(libs_dir)
        old_libs = config.get_value("defaultConfig.libraryDir",
                                    "lib").strip('/')
        old_libs_parts = old_libs.split('/')
        old_libs_dir = os.path.join(source, *old_libs_parts)
        if os.path.isdir(old_libs_dir):
            root_files.append(old_libs_parts[0])
            copy_directory(old_libs_dir, libs_dir)

        # some pre-defined source folders
        sources = [{
            "source": "src/lib/*",
            "type": "library",
            "language": "javascript"
        }, {
            "source": "src/preloader/*",
            "type": "preloader",
            "language": "javascript"
        }]

        ensure_directory(os.path.join(src_dir, "preloader"))

        # import sources
        for source_dir in config.get_filtered_list("compile", "sourceType",
                                                   ("mod", "launcher")):
            if source_dir["sourceType"] == "mod":
                source_dir["sourceType"] = "main"

            sourceObj = {
                "type": source_dir["sourceType"],
                "language": "javascript"
            }

            source_parts = source_dir["path"].split('/')
            root_files.append(source_parts[0])

            build_dirs = config.get_filtered_list("buildDirs", "targetSource",
                                                  (source_dir["path"]))
            if (len(build_dirs) > 0):
                old_build_path = build_dirs[0]["dir"].strip("/")
                old_path_parts = old_build_path.split('/')
                sourceObj["source"] = "src/" + old_build_path
                sourceObj["target"] = source_dir["path"]
                root_files.append(old_path_parts[0])

                copy_directory(os.path.join(source, *old_path_parts),
                               os.path.join(src_dir, *old_path_parts), True)

            else:
                sourceObj["source"] = "src/" + source_dir["path"]
                copy_file(os.path.join(source, *source_parts),
                          os.path.join(src_dir, *source_parts))

            sources.append(sourceObj)

        make_file["sources"] = sources
        return
    exit("unable to read build.config")
Ejemplo n.º 27
0
    def startAnalysis(self):
        self.stopAnalysis()  # ensure no analysis is already running
        # TODO: return if an analysis is already running instead of restarting a new analysis

        if self.videodata is None:  # no video loaded, return gracefully
            return

        if self.solver is not None:  # remove the arrows
            self.solver.clear_annotations()

        self.setPlotOptions()
        self.saveParameters()

        self.output_basepath = utils.ensure_directory(self.fileName, "results")

        if self.checkBox_export.isChecked():
            write_target = utils.ensure_directory(self.fileName, "exports")
        else:
            write_target = None

        print("Tracking: %s." % (self.checkBox_track.isChecked()))
        self.solver = Solver(
            videodata=self.videodata,
            fps=float(self.lineEdit_fps.text()),
            box_dict=self.boxes_dict,
            upsample_factor=int(self.lineEdit_sub_pix.text()),
            stop_frame=int(self.lineEdit_stop_frame.text()),
            start_frame=int(self.lineEdit_start_frame.text()),
            res=float(self.lineEdit_pix_size.text()),
            track=self.checkBox_track.isChecked(),
            compare_first=self.checkBox_compare_first.isChecked(),
            filter=self.checkBox_filter.isChecked(),
            windowing=self.checkBox_windowing.isChecked(),
            matlab=self.checkBox_matlab.isChecked(),
            figure=self.figure,
            write_target=write_target)

        self.figure.savefig("%s_overview.png" % (self.output_basepath))

        combined = skimage.color.gray2rgb(
            self.videodata.get_frame(int(self.lineEdit_start_frame.text())))
        for box in self.boxes_dict:
            rect = box.rect

            top_left = (int(rect.get_x()),
                        int(rect.get_y() + rect.get_height()))
            bottom_right = (int(rect.get_x() + rect.get_width()),
                            int(rect.get_y()))

            combined = cv2.rectangle(combined, top_left, bottom_right,
                                     (255, 0, 0), 1)

        skimage.io.imsave("%s_raw.png" % (self.output_basepath), combined)

        self.solver.progressChanged.connect(self.updateProgress)
        self.solver.start()

        self.timer = QTimer()
        self.timer.timeout.connect(self.updateProgress)
        self.timer.start(100)
        print("Started timer.")
Ejemplo n.º 28
0
def process_list_element(source_dir, data_dir, n, fname):
    print("%07d  %s" % (n, fname))
    nxml_file = os.path.join(source_dir, fname)
    jsn_file = os.path.join(data_dir, 'jsn', fname)
    ensure_directory(jsn_file)
    create_jsn_file(nxml_file, jsn_file)
Ejemplo n.º 29
0
# -------------------------------
#         Validate Inputs
# -------------------------------
assert model in ['GCN', "PrintAttention"]
assert learning_sequence in ['additive', 'dot_product', 'mean']
assert dataset_name in ['Twitter15', 'Twitter16', 'Weibo']
assert dataset_type in ['sequential', 'temporal']
assert snapshot_num in [2, 3, 5]

# --------------------------
#         INIT PATHS
# --------------------------
path_info = [
    model, dataset_name, dataset_type, learning_sequence, snapshot_num, current
]
ensure_directory("./results/")
RESULTS_FILE = "./results/{0}_{1}_{2}_{3}_{4}_{5}_results.txt".format(
    *path_info)
FOLDS_FILE = "./results/{0}_{1}_{2}_{3}_{4}_{5}_folds.json".format(*path_info)
MODEL_PATH = "./results/{0}_{1}_{2}_{3}_{4}_{5}_model.pt".format(*path_info)
LABEL_PATH = './resources/{0}/{0}_label_all.txt'.format(dataset_name)
TREE_PATH = './resources/{0}/data.TD_RvNN.vol_5000.txt'.format(dataset_name)

if dataset_name == 'Weibo':
    LABEL_PATH = './resources/{0}/weibo_id_label.txt'.format(dataset_name)
    TREE_PATH = './resources/{0}/weibotree.txt'.format(dataset_name)

# -------------------------------
#         Hyperparameters
# -------------------------------
iterations = 10