Python ContentFileの例

プログラミング言語: Python

名前空間/パッケージ名: datasets.github.scrape_repos.proto.scrape_repos_pb2

メソッド/関数: ContentFile

hotexamples.comのコード掲載数: 4

Python ContentFile - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdatasets.github.scrape_repos.proto.scrape_repos_pb2.ContentFileの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def IndexContentFiles(job: scrape_repos_pb2.ImportWorker) -> None:
  """Index content files."""
  relpath = job.abspath[len(str(job.clone_dir)) + 1 :]
  try:
    texts = preprocessors.Preprocess(
      pathlib.Path(job.clone_dir),
      relpath,
      job.all_files_relpaths,
      job.preprocessors,
    )
    for i, text in enumerate(texts):
      sha256 = hashlib.sha256(text.encode("utf-8"))
      proto = scrape_repos_pb2.ContentFile(
        clone_from_url=job.clone_from_url,
        relpath=relpath,
        artifact_index=i,
        sha256=sha256.digest(),
        charcount=len(text),
        linecount=len(text.split("\n")),
        text=text,
      )
      path = pathlib.Path(job.index_dir) / (
        binascii.hexlify(proto.sha256).decode("utf-8") + ".pbtxt"
      )
      pbutil.ToFile(proto, path)
  except UnicodeDecodeError:
    app.Warning("Failed to decode %s", relpath)

コード例 #2

ファイルを表示

ファイル: github_repo.py プロジェクト: 50417/phd

 def ContentFiles(self) -> typing.Iterable[scrape_repos_pb2.ContentFile]:
   """Return an iterator over all contentfiles in the repo."""
   if self.IsIndexed():
     return (pbutil.FromFile(f, scrape_repos_pb2.ContentFile())
             for f in self.index_dir.iterdir() if f.name != 'DONE.txt')
   else:
     return []

コード例 #3

ファイルを表示

ファイル: contentfiles.py プロジェクト: tehranixyz/ProGraML

  def ToProto(self) -> scrape_repos_pb2.ContentFile:
    """Create protocol buffer representation.

    Returns:
      A ContentFile message.
    """
    proto = scrape_repos_pb2.ContentFile()
    return self.SetProto(proto)

コード例 #4

ファイルを表示

ファイル: export_corpus.py プロジェクト: SpringRi/phd

def ExportIndex(index_path: pathlib.Path, export_path: pathlib.Path) -> None:
  """Export the contents of an index directory to a directory."""
  contentfile = scrape_repos_pb2.ContentFile()
  for subdir, dirs, files in os.walk(index_path):
    for file in files:
      if file.endswith('.pbtxt'):
        try:
          pbutil.FromFile(pathlib.Path(os.path.join(subdir, file)), contentfile)
          sha256 = binascii.hexlify(contentfile.sha256).decode('utf-8')
          out_path = export_path / (sha256 + '.txt')
          if not out_path.is_file():
            with open(out_path, 'w') as f:
              f.write(contentfile.text)
              logging.debug(out_path)
        except pbutil.DecodeError:
          pass