Example #1
0
 def __init__(self,
              db_url: str,
              grobid_client: Optional[GrobidClient],
              parse_refs: bool = True,
              **kwargs):
     super().__init__(**kwargs)
     self.db = SandcrawlerPostgresClient(db_url)
     self.cur = self.db.conn.cursor()
     if grobid_client:
         self.grobid_client = grobid_client
     else:
         self.grobid_client = GrobidClient()
     self.parse_refs = parse_refs
Example #2
0
 def __init__(self, db_url: str, **kwargs):
     super().__init__()
     self.s3 = SandcrawlerMinioClient(
         host_url=kwargs.get("s3_url", "localhost:9000"),
         access_key=kwargs["s3_access_key"],
         secret_key=kwargs["s3_secret_key"],
         default_bucket=kwargs["s3_bucket"],
     )
     self.s3_only = kwargs.get("s3_only", False)
     self.db_only = kwargs.get("db_only", False)
     assert not (self.s3_only and
                 self.db_only), "Only one of s3_only and db_only allowed"
     if not self.s3_only:
         self.db: Optional[
             SandcrawlerPostgresClient] = SandcrawlerPostgresClient(db_url)
         self.cur: Optional[
             psycopg2.extensions.cursor] = self.db.conn.cursor()
     else:
         self.db = None
         self.cur = None
Example #3
0
 def __init__(self, db_url: str, **kwargs):
     super().__init__(**kwargs)
     self.db = SandcrawlerPostgresClient(db_url)
     self.cur = self.db.conn.cursor()