Python is_indexable_file 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: filetype_utils

메소드/함수: is_indexable_file

hotexamples.com에서의 예제들: 5

Python is_indexable_file - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 filetype_utils.is_indexable_file에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: b_dir_src.py 프로젝트: mpi-sws-rse/datablox

 def indexable_file(self, path):
   """If true, the file can be indexed by the indexing engine (e.g. contains
   text content). We will only send the data if this returns True.
   """
   if self.only_index:
     for e in self.only_index:
       if path.endswith(e):
         return True
     return False
   else:
     return filetype_utils.is_indexable_file(path)

예제 #2

파일 보기

파일: test_filetypes.py 프로젝트: mpi-sws-rse/datablox

 def _tc(self, path, filetype, category, indexable):
     """The work for running a single testcase"""
     (t, c) = filetype_utils.get_file_description_and_category(path)
     self.assertEqual(t, filetype,
                      "Expecting filetype %s for %s, got %s" %
                      (filetype, path, t))
     self.assertEqual(c, category,
                      "Expecting category %s for %s, got %s" %
                      (category, path, c))
     self._check_category(path, t, c)
     i = filetype_utils.is_indexable_file(path)
     self.assertEqual(i, indexable,
                      "Path %s is %sindexable, expecting %sindexable" %
                      (path, "" if i else "not ", "" if indexable else "not "))

예제 #3

파일 보기

파일: b_filename_categorizer.py 프로젝트: mpi-sws-rse/datablox

 def include(record):
   if record.has_key("token"):
     return True
   else:
     return filetype_utils.is_indexable_file(p2f(record["path"]))

예제 #4

파일 보기

파일: b_filename_categorizer.py 프로젝트: mpi-sws-rse/datablox

 def indexable_file(self, path):
   """If true, the file can be indexed by the indexing engine (e.g. contains
   text content). We will only send the data if this returns True.
   """
   return filetype_utils.is_indexable_file(path)

예제 #5

파일 보기

파일: test_file_crawl.py 프로젝트: mpi-sws-rse/datablox

def crawl_files(directory_to_crawl):
    path = os.path.abspath(os.path.expanduser(directory_to_crawl))
    assert os.path.isdir(path)
    print "Crawling directory %s" % path
    total_size = 0
    total_cnt = 0
    indexed_size = 0
    indexed_cnt = 0
    size_by_category = {}
    cnt_by_category = {}
    size_by_type = {}
    cnt_by_type = {}
    size_by_tag = {}
    cnt_by_tag = {}
    
    entries = [["Path", "Size", "Category", "Type", "Indexable?", "Tags"],]
    for root, dirnames, filenames in os.walk(path):
        for filename in filenames:
            entry = []
            fpath = os.path.join(root, filename)
            entry.append(fpath)
            stat = os.stat(fpath)
            filesize = stat.st_size
            total_size += filesize
            total_cnt += 1
            entry.append(str(filesize))
            (filetype, category) = filetype_utils.get_file_description_and_category(fpath)
            entry.append(category)
            add_to_dict(size_by_category, category, filesize)
            add_to_dict(cnt_by_category, category, 1)
            add_to_dict(size_by_type, filetype, filesize)
            add_to_dict(cnt_by_type, filetype, 1)
            entry.append(filetype)
            if filetype_utils.is_indexable_file(fpath):
                entry.append("Yes")
            else:
                entry.append("No")
            tags = filetype_utils.get_tags(fpath)
            if len(tags)>0:
                entry.append(tags[0])
                add_to_dict(size_by_tag, tags[0], filesize)
                add_to_dict(cnt_by_tag, tags[0], 1)
            else:
                entry.append("None")
                add_to_dict(size_by_tag, "untagged", filesize)
                add_to_dict(cnt_by_tag, "untagged", 1)
            entries.append(entry)
        print "Crawled %d files, for %3.2f MB total" % (total_cnt,
                                                        float(total_size)/
                                                        float(total_cnt)/1000000.0)
        datafile = os.path.abspath("./file_data.csv")
        with open(datafile, "w") as fd:
            for entry in entries:
                fd.write(", ".join(entry) + "\n")
        print "Wrote data to file %s" % datafile

        aggfile = os.path.abspath("./aggregate_data.csv")
        with open(aggfile, "w") as fa:
            fa.write("Group, Subgroup, Value\n")
            fa.write("Total, Count, %d\n" % total_cnt)
            fa.write("Total, Size, %d\n" % total_size)
            dict_to_csv(fa, "Cnt by Category", cnt_by_category)
            dict_to_csv(fa, "Size by Category", size_by_category)
            dict_to_csv(fa, "Cnt by Type", cnt_by_type)
            dict_to_csv(fa, "Size by Type", size_by_type)
            dict_to_csv(fa, "Cnt by Tag", cnt_by_tag)
            dict_to_csv(fa, "Size by Tag", size_by_tag)
        print "Wrote aggregates to file %s" % aggfile