Example #1
0
def parse_file(file):
    fname = bookname(file)
    document = new_doc(fname, 'lxx')
    chapter = 0
    verse = 0

    chapter_line = True
    for line in read_file(file):
        if should_break_greek(line):
            break

        if not line.strip():
            break

        if chapter_line:
            try:
                chapter, verse = is_greek_chapter(line)
            except (TypeError, ValueError):
                print(line)
            document = add_chapter(document, chapter)
            chapter_line = False
            continue

        for text in split_greek_verses(line):
            add_verse(document, verse, text)
            verse += 1

        chapter_line = True

    return document, fname
Example #2
0
def read_cached_info(inloc):
    #	reads the cached raw semmeddb info
    #	and returns a set of unique tuples

    semmed_tuples = set()
    name = dict()
    identifiers = defaultdict(list)

    for line in read_file("semmed_raw_info.txt", inloc):
        vals = line.split('#')

        #		obj_cui are all C1234567 (no gene ids)
        sub_ids = vals[3].split('|')
        sub_names = vals[4].split('|')

        assert check.is_cui(vals[6]), "not a cui! PID {0} s_cui {1}".format(
            vals[0], vals[6])

        semmed_tuples |= set([(val, vals[6]) for val in sub_ids])

        for val in sub_ids:
            identifiers[(val, vals[6])].append(
                (vals[5], vals[0], vals[1], vals[2]))

        name[vals[6]] = vals[7]
        for sub, s_name in zip(sub_ids, sub_names):
            name[sub] = s_name

    return (semmed_tuples, name, identifiers)
Example #3
0
def generate_mix_features(source_dir_path,
                          target_dir_path,
                          params_list,
                          group_size=300,
                          bin_number=10):
    # print "script: extract_feature.py,  lineNumber:", sys._getframe().f_lineno, ",  func:", sys._getframe().f_code.co_name
    for root, _, files in os.walk(source_dir_path):
        for file in files:
            data = file_util.read_file(source_dir_path + "\\" + file)
            # #---------------test function-------------------------
            # denoised_data = data_process.denoise(data, settings.THRESHOLDS)

            # file_util.write_file('denoised'+'\\'+file, denoised_data)
            fp = extract_feature.get_feature_from_matrix(
                data, group_size, bin_number)
            # fp = extract_feature.get_feature_from_matrix(denoised_data, group_size, bin_number)
            fp_dict = dict()
            for i, row in enumerate(fp):
                row = data_process.normalize(row)  #归一化
                fp_dict[i] = row

            mix_fp_temp = list_util.merge(fp_dict[0], fp_dict[1])
            mix_fp = list_util.merge(mix_fp_temp, fp_dict[2])
            target_file = os.path.join(target_dir_path,
                                       file[:-4] + "_" + ".txt")
            file_util.write_file(target_file, mix_fp)
Example #4
0
def read_omim_names():
    dmim_name = dict()
    inloc = os.path.join(HOME, "databases/omim/data")
    for line in read_file("dmim_name.txt", inloc):
        dmim, name = line.split('|')
        dmim_name[dmim] = name

    return dmim_name
Example #5
0
def get_text_semtypes():
    text_semtypes = dict()
    loc = "/home/toby/global_util/semmeddb/data/"
    for line in islice(read_file("text_semtypes.txt", loc), 1, None):
        vals = line.split('|')
        text = vals[0]
        semtypes = vals[1:]

        text_semtypes[text] = set(semtypes)

    return text_semtypes
Example #6
0
def get_pred(path):
    lines = file_util.read_file(path).split('\n')
    bboxes = []
    for line in lines:
        if line == '':
            continue
        bbox = line.split(',')
        if len(bbox) % 2 == 1:
            print(path)
        bbox = [int(x) for x in bbox]
        bboxes.append(bbox)
    return bboxes
Example #7
0
def main():
    loc = "/home/toby/implicitome/orig_data/"
    fname = "matchscores.txt-coOcc-no-NaN.txt"
    print "working"
    with open("explicit_links.txt", "w") as explicit:
        with open("implicit_links.txt", "w") as implicit:
            for line in read_file(fname, loc):
                vals = line.split(',', 3)
                assert len(vals) == 4, "{0}".format("|".join(vals))
                if vals[3] == "[]":  # implicit
                    implicit.write("{0}\n".format("|".join(vals[:-1])))
                else:  # explicit
                    explicit.write("{0}\n".format("|".join(vals[:-1])))
Example #8
0
def get_semmed_tuples():
    """Returns all semmeddb tuples"""
    semmed_tuples = set()
    loc = os.path.join(HOME, "global_util/semmeddb/data")
    for line in read_file("uniq_pred_agg.txt", loc):
        sub, s_type, pred, obj, o_type = line.split('\t')

        s_cuis = sub.split('|')
        o_cuis = obj.split('|')

        semmed_tuples |= set([(s, o) for s in s_cuis for o in o_cuis])

    return semmed_tuples
def read_cached_tuples(fname):
    """
	Reads a cached implicitome block of information.
	Assumes that the block exists. (Check for existence
	is performed by get_raw_tuples()).
	"""
    logging.debug("Reading from cache")
    raw_tuples = set()
    for line in read_file(fname):
        tuple_id, sub_id, obj_id = line.split('|')
        raw_tuples.add((tuple_id, sub_id, obj_id))

    return raw_tuples
Example #10
0
def get_text_to_cuis():
    """
	Returns a dictionary of text to CUI mappings.
	"""
    text_to_cuis = dict()
    loc = "/home/toby/global_util/semmeddb/data/"
    for line in islice(read_file("text_to_cuis.txt", loc), 1, None):
        vals = line.split('|')

        text = vals[0]
        cuis = vals[1:]
        text_to_cuis[text] = set(cuis)

    return text_to_cuis
Example #11
0
def get_cui_semtypes():
    """
	Returns the unique semantic types of a CUI as a dict of set.
	"""
    cui_semtypes = dict()
    loc = "/home/toby/global_util/semmeddb/data/"
    for line in islice(read_file("cui_semtypes.txt", loc), 1, None):
        vals = line.split('|')
        cui = vals[0]
        semtypes = vals[1:]

        cui_semtypes[cui] = set(semtypes)

    return cui_semtypes
Example #12
0
def create(arg_hash):
    # Read in the raw strings from the contents of the files.
    input_dir = arg_hash['in_dir']
    out_dir = arg_hash['out_dir']

    if arg_hash['clean']:
        print 'cleaning'
        initr.clean_dirs(input_dir, out_dir)
        if not arg_hash['test']:
            sys.exit(1)

    options = initr.init_or_read_opts(input_dir, out_dir,
                                      arg_hash['clean_init'])

    raw_contents = []
    if os.path.isdir(input_dir):
        raw_contents = file_util.read_files(input_dir)
    else:
        raw_contents = [file_util.read_file(input_dir)]

    if raw_contents == []:
        "Couldn't find any blog file(s) in: %s" % input_dir
        sys.exit(0)

    # Build the blog parser so we can parse the raw strings
    blog_parser = parser.buildParser("yaml", options)

    # Parse the YAML files into a blog object
    blog = blog_parser.parse(raw_contents)

    # Sort the posts by date
    # TODO(josh): Should this be internal to the blog?
    blog.sort_posts()

    # Create the links
    blog.create_links()

    # Display the AST (for the curious)
    # print blog.display_ast()

    # Generate the JSON representation
    json_out = "var pyrite_data = " + (blog.generate_json().strip()) + ";"
    # print json_out

    # Create the necessary pyrite directories
    json_path = os.path.join(out_dir, psettings.JS_DIR, psettings.DATA_FILE)
    file_util.write_file(json_path, json_out)
Example #13
0
def create(arg_hash):
  # Read in the raw strings from the contents of the files. 
  input_dir = arg_hash['in_dir']
  out_dir = arg_hash['out_dir']

  if arg_hash['clean']: 
    print 'cleaning'
    initr.clean_dirs(input_dir, out_dir) 
    if not arg_hash['test']:
      sys.exit(1) 

  options = initr.init_or_read_opts(input_dir, out_dir,
    arg_hash['clean_init'])

  raw_contents = []
  if os.path.isdir(input_dir):
    raw_contents = file_util.read_files(input_dir) 
  else:
    raw_contents = [ file_util.read_file(input_dir) ]

  if raw_contents == []:
    "Couldn't find any blog file(s) in: %s" % input_dir
    sys.exit(0)

  # Build the blog parser so we can parse the raw strings
  blog_parser = parser.buildParser("yaml", options)

  # Parse the YAML files into a blog object
  blog = blog_parser.parse(raw_contents)

  # Sort the posts by date
  # TODO(josh): Should this be internal to the blog? 
  blog.sort_posts()

  # Create the links 
  blog.create_links()

  # Display the AST (for the curious)
  # print blog.display_ast()

  # Generate the JSON representation    
  json_out = "var pyrite_data = " + (blog.generate_json().strip()) + ";" 
  # print json_out

  # Create the necessary pyrite directories 
  json_path = os.path.join(out_dir, psettings.JS_DIR, psettings.DATA_FILE)
  file_util.write_file(json_path, json_out)
def all_links_of_type(link_type):
    """
	Returns all the gene-disease links of a certain kind (explicit
	or implicit).
	"""
    assert link_type in ["implicit",
                         "explicit"], "bad choice for implicitome links"
    loc = "/home/toby/databases/implicitome/"
    fname = "{0}_links.txt".format(link_type)

    sub_ids, obj_ids = read_ids()
    for line in read_file(fname, loc):
        sub, obj, score = line.split("|")
        if "EG" in sub_ids[sub] and "UMLS" in obj_ids[obj]:
            gene_ids = sub_ids[sub]["EG"]
            cuis = obj_ids[obj]["UMLS"]
            yield ((sub, obj), gene_ids, cuis)
Example #15
0
def plot_re(filename, threshold):
    result = file_util.read_file(filename)
    p = list_util.get_column_from_matrix(result, 0)
    r = list_util.get_column_from_matrix(result, 1)
    f = list_util.get_column_from_matrix(result, 2)

    indices = [i for i in range(len(f)) if f[i] > threshold]

    new_p = [p[i] for i in indices]
    new_r = [r[i] for i in indices]
    new_f = [f[i] for i in indices]

    # plt.axis([0, 50, 0, 1])
    # plt.plot(new_r[:50], new_p[:50],color='steelblue', marker='*', label='precision')
    # plt.plot(range(50), new_r[:50],color='darkseagreen', marker='*', label='recall')
    # plt.plot(range(50), new_f[:50],color='salmon', marker='*', label='F-measure')
    plt.show()
Example #16
0
def get_gt(path):
    lines = file_util.read_file(path).split('\n')
    bboxes = []
    for line in lines:
        if line == '':
            continue
        # line = util.str.remove_all(line, '\xef\xbb\xbf')
        # gt = util.str.split(line, ',')
        gt = line.split(',')

        x1 = np.int(gt[0])
        y1 = np.int(gt[1])

        bbox = [np.int(gt[i]) for i in range(4, 32)]
        bbox = np.asarray(bbox) + ([x1, y1] * 14)

        bboxes.append(bbox)
    return bboxes
Example #17
0
def read_readme_file(file, args):
  """ Read a README.txt and try to parse its containing version numbers """
  lines = read_file(file).split("\n")
  for line in lines:
    parts = line.split(':', 1)
    if len(parts) != 2:
      continue
    if parts[0].startswith('CEF Version'):
      args['CEF_VER'] = parts[1].strip()
      verparts = args['CEF_VER'].split('.')
      if len(verparts) >= 2:
        args['CEF_MAJOR'] = verparts[0]
        args['CEF_BUILD'] = verparts[1]
    elif parts[0].startswith('CEF URL'):
      args['CEF_URL'] = parts[1].strip()
    elif parts[0].startswith('Chromium Verison'):
      args['CHROMIUM_VER'] = parts[1].strip()
    elif parts[0].startswith('Chromium URL'):
      args['CHROMIUM_URL'] = parts[1].strip()
Example #18
0
def read_readme_file(file, args):
    """ Read a README.txt and try to parse its containing version numbers """
    lines = read_file(file).split("\n")
    for line in lines:
        parts = line.split(':', 1)
        if len(parts) != 2:
            continue
        if parts[0].startswith('CEF Version'):
            args['CEF_VER'] = parts[1].strip()
            verparts = args['CEF_VER'].split('.')
            if len(verparts) >= 2:
                args['CEF_MAJOR'] = verparts[0]
                args['CEF_BUILD'] = verparts[1]
        elif parts[0].startswith('CEF URL'):
            args['CEF_URL'] = parts[1].strip()
        elif parts[0].startswith('Chromium Version'):
            args['CHROMIUM_VER'] = parts[1].strip()
        elif parts[0].startswith('Chromium URL'):
            args['CHROMIUM_URL'] = parts[1].strip()
Example #19
0
def parse_file(file):
    fname = bookname(file)
    document = new_doc(fname, 'mt')
    chapter = 0
    verse = 0
    for line in read_file(file):
        m = is_hebrew_chapter(line)
        if m is not None:
            chapter = m
            document = add_chapter(document, chapter)
            verse = 0
            continue

        if should_skip_hebrew(line):
            continue

        verse += 1
        text_line = hebrew_chars(line)
        document = add_verse(document, verse, text_line)

    return document, fname
Example #20
0
def main():
    for line in read_file("a.txt"):
        pid, sid = line.split('|')
        sub, pred, obj = find_locations(pid, sid)
        sent = get_sentence(sid)

        print highlight(sent, sub, pred, obj)
        #		work(sent, sub, pred, obj)
        raw_input("Press Enter to continue...")

    return

    print "hi"
    pid = "539852"
    sid = "3666031"
    a = find_locations(pid, sid)
    b = get_sentence(sid)

    print b
    print a
    highlight(b, a)
Example #21
0
def parse_morbidmap():
    """
	Returns the set of unique genes associated with a particular
	OMIM disease in the format:
		genes[dmim] = set(gmim, gmim, gmim...)
	"""
    genes = defaultdict(set)  # all unique gmims assosicated with dmim
    name = dict()  # name of dmim
    loc = os.path.join(HOME, "databases/omim/data")
    for line in read_file("morbidmap.txt", loc):
        disease, gene, gmim, locus = line.split("|")

        res = info(disease)
        if res:
            genes[res[0]].add(gmim)
            name[res[0]] = res[1]

    with open(os.path.join(loc, "dmim_name.txt"), "w") as out:
        for dmim, disease_name in name.items():
            out.write("{0}|{1}\n".format(dmim, disease_name))

    return genes
Example #22
0
def i_am_buster(source_dir):
    # 错误恢复,翻译到一半挂了恢复, 直接在开头加一段注释,读文件读到该注释,直接跳过
    # TODO 文件太多,多进程翻译(貌似python多线程就是个渣渣)
    """

    :param source_dir: 项目文件夹
    :return:
    """
    files = scan_directory(source_dir)
    for file_name in files:
        # 去除source_dir,使得显示更加舒适
        short_file_name = file_name[len(source_dir):]
        lines = file_util.read_file(file_name)
        if len(lines) != 0 and lines[0].startswith(head):
            print("跳过对文件", short_file_name, "的翻译。")
            continue
        print("开始翻译:" + short_file_name)
        trans_result = trans(lines)
        trans_result = head + trans_result
        file_util.write_back(trans_result, file_name)
        print("完成对 " + short_file_name + " 的翻译")
        time.sleep(1)
Example #23
0
def read_readme_file(file, args):
    """ Read a README.txt and try to parse its containing version numbers """
    lines = read_file(file).split("\n")
    for line in lines:
        parts = line.split(':', 1)
        if len(parts) != 2:
            continue
        if parts[0].startswith('CEF Version'):
            args['CEF_VER'] = parts[1].strip()
            subparts = parts[1].split('+')
            if len(subparts) != 3:
                raise Exception('Failed to parse CEF Version: %s' % parts[1])
            verparts = subparts[0].strip().split('.')
            if len(verparts) >= 3:
                args['CEF_MAJOR'] = verparts[0]
                args['CEF_MINOR'] = verparts[1]
                args['CEF_PATCH'] = verparts[2]
        elif parts[0].startswith('CEF URL'):
            args['CEF_URL'] = parts[1].strip()
        elif parts[0].startswith('Chromium Version'):
            args['CHROMIUM_VER'] = parts[1].strip()
        elif parts[0].startswith('Chromium URL'):
            args['CHROMIUM_URL'] = parts[1].strip()
Example #24
0
def get_gt(path):
    lines = file_util.read_file(path).split('\n')
    bboxes = []
    tags = []
    for line in lines:
        if line == '':
            continue
        # line = util.str.remove_all(line, '\xef\xbb\xbf')
        # gt = util.str.split(line, ' ')
        gt = line.split(' ')

        w_ = np.float(gt[4])
        h_ = np.float(gt[5])
        x1 = np.float(gt[2]) + w_ / 2.0
        y1 = np.float(gt[3]) + h_ / 2.0
        theta = np.float(gt[6]) / math.pi * 180

        bbox = cv2.boxPoints(((x1, y1), (w_, h_), theta))
        bbox = bbox.reshape(-1)

        bboxes.append(bbox)
        tags.append(np.int(gt[1]))
    return np.array(bboxes), tags
Example #25
0
def update_file(filename):
  oldcontents = read_file(filename)
  if len(oldcontents) == 0:
    msg(filename, "empty")
    return

  if os.path.splitext(filename)[1] == ".py":
    # Format Python files using YAPF.
    newcontents = yapf_format(filename, oldcontents)
  else:
    # Format C/C++/ObjC/Java files using clang-format.
    newcontents = clang_format(filename, oldcontents)

  if newcontents is None:
    raise Exception("Failed to process %s" % filename)

  if newcontents != oldcontents:
    msg(filename, "fixed")
    global updatect
    updatect += 1
    write_file(filename, newcontents)
  else:
    msg(filename, "ok")
  return
def buster_english(source_dir):
    # TODO 文件太多,多进程翻译(貌似python多线程就是个渣渣)
    """
    扫描source_dir下所有的文件,然后对这些源码文件进行翻译
    :param source_dir: 项目文件夹
    :return:
    """
    # 扫描文件夹下所有文件
    files = scan_directory(source_dir, source_format={'java', 'kt'})
    progress_iter = tqdm(files)
    for file_name in progress_iter:
        # 获得项目内的相对路径
        relative_file_path = file_name.lstrip(source_dir)
        # 更新进度条
        progress_iter.set_description("正在处理文件{}".format(relative_file_path))
        # 读取文件
        lines = file_util.read_file(file_name)
        # 如果扫描到文件开头是headline,则直接跳过对该文件的处理
        if len(lines) != 0 and lines[0].startswith(first_line):
            continue
        # 获取翻译结果并加上加上头部
        trans_result = first_line + resolve(lines)
        # 文件写回
        file_util.write_back(trans_result, file_name)
Example #27
0
def main():
    #	split up and count with respect to type
    print "counting"
    count = defaultdict(int)
    uniq_tuples = defaultdict(set)
    for line in read_file("wow.txt"):
        sub, s_type, pred, obj, o_type = line.split('\t')
        s_cuis = sub.split('|')
        o_cuis = obj.split('|')

        tuples = set([(s, o) for s in s_cuis for o in o_cuis])

        count[(s_type, pred, o_type)] += len(tuples)
        uniq_tuples[(s_type, pred, o_type)] |= tuples


#	try to intersect with omim and look for hits
    intersect = dict()
    i = 0
    for trip, tups in uniq_tuples.items():
        print i
        intersect[trip] = compare_with_omim(tups)
        i += 1

    print "caching"

    ans = [(v, num) for v, num in count.items()]
    ans = sorted(ans, key=lambda x: x[1], reverse=True)

    with open("triple_types.txt", "w") as out:
        for v, num in ans:
            s = v[0]
            p = v[1]
            o = v[2]
            out.write("{0}|{1}|{2}|{3}|{4}\n".format(s, p, o, num,
                                                     intersect[v]))
Example #28
0
def update_file(filename):
    oldcontents = read_file(filename)
    if len(oldcontents) == 0:
        msg(filename, "empty")
        return

    if os.path.splitext(filename)[1] == ".py":
        # Format Python files using YAPF.
        newcontents = yapf_format(filename, oldcontents)
    else:
        # Format C/C++/ObjC/Java files using clang-format.
        newcontents = clang_format(filename, oldcontents)

    if newcontents is None:
        raise Exception("Failed to process %s" % filename)

    if newcontents != oldcontents:
        msg(filename, "fixed")
        global updatect
        updatect += 1
        write_file(filename, newcontents)
    else:
        msg(filename, "ok")
    return
Example #29
0
def load_converted_morbidmap():
    """
	For the genes and diseases in OMIM morbidmap, returns
	the Entrez gene IDs and the UMLS CUIs that the information
	corresponds to.
	"""
    dmim_cuis = dict()
    gene_ids = dict()
    inloc = os.path.join(HOME, "databases/omim/data")
    cur_dmim = ""
    for i, line in enumerate(read_file("converted_morbidmap.txt", inloc)):
        line = line.lstrip('\t')

        vals = line.split('|')
        if i % 2 == 0:  # dmim
            if len(vals) == 1:  # no cuis
                cur_dmim = ""
            else:
                cur_dmim = vals[0]
                dmim_cuis[cur_dmim] = set(vals[1:])
        elif cur_dmim:
            gene_ids[cur_dmim] = set(vals)

    return (dmim_cuis, gene_ids)
Example #30
0
          for line in result['out'].split('\n'):
            if line.find('FAILED') != -1:
              failed_lines.append(line.strip())
          warn('Failed to apply %s, fix manually and run with --resave' % \
               patch['name'])
          failed_patches[patch['name']] = failed_lines
          continue

        if options.restore:
          # Restore from backup if a backup exists.
          for patch_path in patch_paths:
            patch_path_abs = os.path.abspath(os.path.join(patch_root_abs, \
                                                          patch_path))
            backup_path_abs = patch_path_abs + backup_ext
            if os.path.exists(backup_path_abs):
              if read_file(patch_path_abs) == read_file(backup_path_abs):
                msg('Restoring backup of %s' % patch_path_abs)
                remove_file(patch_path_abs)
                move_file(backup_path_abs, patch_path_abs)
              else:
                msg('Discarding backup of %s' % patch_path_abs)
                remove_file(backup_path_abs)
                has_backup_changes = True
            else:
              msg('No backup of %s' % patch_path_abs)

    if (not options.revert and not options.reapply) or has_backup_changes:
      if len(options.add) > 0:
        # Add additional requested files to the patch.
        for patch_path in options.add:
          patch_path_abs = os.path.abspath(os.path.join(patch_root_abs, \
Example #31
0
def read_default_options():
  to_read = os.path.join(
      file_util.get_module_dir(),
      psettings.RES_DIR,
      psettings.DEFAULT_OPTS)
  return file_util.read_file(to_read)
Example #32
0
                    for line in result['out'].split('\n'):
                        if line.find('FAILED') != -1:
                            failed_lines.append(line.strip())
                    warn('Failed to apply %s, fix manually and run with --resave' % \
                         patch['name'])
                    failed_patches[patch['name']] = failed_lines
                    continue

                if options.restore:
                    # Restore from backup if a backup exists.
                    for patch_path in patch_paths:
                        patch_path_abs = os.path.abspath(os.path.join(patch_root_abs, \
                                                                      patch_path))
                        backup_path_abs = patch_path_abs + backup_ext
                        if os.path.exists(backup_path_abs):
                            if read_file(patch_path_abs) == read_file(
                                    backup_path_abs):
                                msg('Restoring backup of %s' % patch_path_abs)
                                remove_file(patch_path_abs)
                                move_file(backup_path_abs, patch_path_abs)
                            else:
                                msg('Discarding backup of %s' % patch_path_abs)
                                remove_file(backup_path_abs)
                                has_backup_changes = True
                        else:
                            msg('No backup of %s' % patch_path_abs)

        if (not options.revert and not options.reapply) or has_backup_changes:
            if len(options.add) > 0:
                # Add additional requested files to the patch.
                for patch_path in options.add:
Example #33
0
def main(stdscr, data=None):
	'''Main function, includes the input-output loop'''
	# Fire up the CLI and set everything up
	editwin = init_term(stdscr, data)
	# Initial cursor positions
	pos_h = 0
	pos_w = 0
	# If we're opening a file, add it's content to the view
	if data:
		filename = data
		data = read_file(data)
		editwin.addstr(data)
		pos_h = len(data.split('\n'))
		pos_w = len(data) - pos_h
	else:
		filename = None
		data = ''
	stdscr.refresh()
	modified = False
	# Main editing loop
	while True:
		# Get input from user
		char = editwin.getkey()
		
		# CTRL^X
		if ord(char) == 24:
			if modified:
				ret = exit(stdscr, editwin, data, filename)
			else:
				ret = 0
			if ret == 0:
				curses.echo()
				curses.endwin()
				sys.exit(0)
			else:
				write_keys(stdscr)
				continue

		# Scrolling feature - SEMI-WORKING, NEED MORE WORK
		if pos_h >= curses.LINES - 7:
			stdscr.scroll()
			pos_h -= 1
			stdscr.refresh()
		if char == '\n':
			pos_h += 1
			pos_w = 0
			editwin.move(pos_h, 0)
			stdscr.refresh()
			continue

		# Backspace
		if ord(char) == 8 or ord(char) == 127:
			# If we are at the first char, we ignore the click since there's nothing to delete
			if pos_h == 0 and pos_w == 0:
				continue
			# Else, if we delete a line we need to find the position of the last character typed
			if pos_w == 0:
				pos_w = curses.COLS - 2
				while pos_w != 0:
					pos_w -= 1
					try:
						c = chr(editwin.inch(pos_h - 1, pos_w))
						if c != ' ':
							pos_w += 1
							break
					except Exception:
						continue
				pos_h -= 1
				editwin.move(pos_h, pos_w)
				editwin.refresh()
				continue
			# Else, just remove the char and move the cursor position
			else:
				editwin.delch(pos_h, pos_w - 1)
				pos_w -= 1
				stdscr.refresh()
				continue
		
		# Add recived input to data buffer and add it to view
		data += char
		editwin.addch(pos_h, pos_w, char)

		# Deal with the size limits
		if pos_w == curses.COLS - 4:
			pos_h += 1
			pos_w = 0
			editwin.move(pos_h, 0)
			stdscr.refresh()
		else:
			pos_w += 1
		
		# Finally, refresh the view
		editwin.refresh()

		# We keep a boolean value to add the 'Modified' label when we make changes
		if not modified:
			modified = True
			modified_alert(stdscr)
    """Data denoise, Filter out any data greater than the threshold.
       :parameter
       ----------
       matrix: two-dimensional list 二维列表
       thresholds: list, 每一个的参数的阈值,即每一列的阈值。只有当一个样本中的
                    每一个参数均小于其对应的阈值时,才保留该样本
       ----------
       example:
       matrix = [[1.0, 2.0, 3.0],
                [4.0, 2.0, 6.0], 
                [1.5, 2.9, 3.5], 
                [1.5, 3.1, 3.5]]
       >>denoise(matrix, [2.0, 3.0, 4.0])
       >>[[1.0, 2.0, 3.0], [1.5, 2.9, 3.5]]
       """

    # print "script: data_process.py,  lineNumber:", sys._getframe().f_lineno, ",  func:", sys._getframe().f_code.co_name
    def fn(list):
        for i, item in enumerate(list):
            if item > thresholds[i]:
                return False
        return True

    return filter(fn, matrix)


if __name__ == '__main__':
    import file_util
    data = file_util.read_file('jzp_phone.txt')
    denoised = denoise(data, )
Example #35
0
# last updated 2015-02-27 toby
"""
Maps each snippet of text to all of the CUIs it could represent.
"""

import sys
sys.path.append("/home/toby/global_util/")
from file_util import read_file
from collections import defaultdict
from itertools import islice

print "Reading data"
text_to_cui = defaultdict(set)
for line in islice(read_file("data/pred_concept_source_text.txt"), 1, None):
    pid, sid, sub, obj, sub_text, obj_text = line.split('\t')
    #	some text fields are empty, probably due to errors, so are ignored

    assert "|" not in sub_text, "| in {0}".format(sub_text)
    assert "|" not in obj_text, "| in {0}".format(obj_text)

    if sub_text:
        s_cuis = sub.split('|')
        text_to_cui[sub_text] |= set(s_cuis)

    if obj_text:
        o_cuis = obj.split('|')
        text_to_cui[obj_text] |= set(o_cuis)

print "Writing to file"
with open("./data/text_to_cuis.txt", "w") as out:
    out.write("text|cuis\n")
                         area_query=area_query)
    logger.debug('XML query """%s"""', query)
    overpass_osm = overpass_xml(query, conflate_cache_filename=args.conflate_cache_filename)
    # for key in osm.nsrids:
    #     print key, osm.nsrids[key]

    # print len(osm), osm

    nbr_osms_filenames = args.osm_filename
    if args.osm_kommune:
        for kommune in args.osm_kommune:
            nbr_osms_filenames.extend(get_kommune(kommune))

    nbr_osms = []
    for filename in nbr_osms_filenames:
        xml = file_util.read_file(filename)
        nbr_osms.append(osmapis.OSMnsrid.from_xml(xml))

    if len(nbr_osms) == 0:
        print 'Warning: You need to supply either --osm_kommune and/or --osm_filename, see --help. Exiting...'
        exit(1)
    
    # Combine osm objects
    nbr_osm = osmapis.OSMnsrid()
    for o in nbr_osms:
        for item in o:
            nbr_osm.add(item)

    print 'Saving the combined nbr data as nbr.osm'
    nbr_osm.save('nbr.osm')
Example #37
0
#coding: utf-8
import matplotlib.pyplot as plt
import file_util, list_util

if __name__ == '__main__':
    # feature = "answer"
    rf_result = file_util.read_file("rf_result.txt")
    svm_result = file_util.read_file("svm_result.txt")
    knn_result = file_util.read_file("knn_result.txt")
    bayes_result = file_util.read_file("bayes_result.txt")

    rf_F = list_util.get_column_from_matrix(rf_result, 2)
    svm_F = list_util.get_column_from_matrix(svm_result, 2)
    knn_F = list_util.get_column_from_matrix(knn_result, 2)
    bayes_F = list_util.get_column_from_matrix(bayes_result, 2)

    import numpy as np

    a = np.array(bayes_F)
    print np.mean(a)
    # x = range(100, 550, 50)
    #
    # plt.figure(1)
    #
    # plt.axis([50, 650, 0, 1.2])
    # plt.plot(x, rf_F, marker='o', color='steelblue', label='RF')
    # plt.plot(x, svm_F, marker='*', color='darkseagreen', label='SVM')
    # plt.plot(x, knn_F, marker='^', color='orange', label='KNN')
    # plt.plot(x, bayes_F, marker='v', color='lightcoral', label='NBC')
    # # plt.legend(loc='upper center', ncol=4)
    # plt.legend()