Python MongoConnector Examples, MongoConnector.MongoConnector Python Examples

Example #1

0

Show file

File: ConnectionEstablisher.py Project: Nafisur-jspl/SoonerSid

class ConnectionEstablisher(object):

    # Defining the local environment variables
    # Needed for API.AI and Mongo Client

    MONGODB_ACCESS = "DB_ACCESS"
    APICLIENT_ACCESS_TOKEN = "CLIENT_ACCESS_TOKEN"
    LANGUAGE = "en"
    DB_NAME = 'heroku_swknz2mg'
    TABLE_NAME = 'userinfo'

    def __init__(self):
        self.session = self.create_sessionid()
        self.apicat = self.get_envariable(
            ConnectionEstablisher.APICLIENT_ACCESS_TOKEN)
        self.mongoaccess = self.get_envariable(
            ConnectionEstablisher.MONGODB_ACCESS)
        self.apisecure = ApiConnector(self.session, self.LANGUAGE, self.apicat)
        self.mongoclient = MongoConnector(self.mongoaccess)

    # A function for creating a unique session id when establishing
    # a connection with API.ai server, 36 is the limit for a api.ai server
    def create_sessionid(self):
        return str(uuid.uuid4())[:36]

    # Gets the environment variable from the local OS
    def get_envariable(self, vname):
        return os.getenv(vname)

    # Connects to the API.AI by sending in the text
    # Gets back the http response object and converts them into json and
    # returns it back
    def api_connect(self, text):
        self.apisecure = ApiConnector(
            session_id=self.session, lang=self.LANGUAGE,
            cat=self.apicat)  # cat - Client Access Token
        response = self.apisecure.send_textquery(text).read()
        response_json = json.loads(response.decode('utf-8'))
        return response_json

    # db functions like checking for a record, insert, delete that calls MongoConnectors functions

    def dbrecord_exists(self, **fields):
        if self.mongoclient.record_exists(
                DB=ConnectionEstablisher.DB_NAME,
                TABLE=ConnectionEstablisher.TABLE_NAME,
                **fields):
            return True
        else:
            return False

    def dbrecord_insert(self, **fields):
        self.mongoclient.insert(ConnectionEstablisher.DB_NAME,
                                ConnectionEstablisher.TABLE_NAME, **fields)

    def dbrecord_update(self, user_id, **fields):
        self.mongoclient.update(DB=ConnectionEstablisher.DB_NAME,
                                TABLE=ConnectionEstablisher.TABLE_NAME,
                                user_id=user_id,
                                **fields)

Example #2

0

Show file

File: ConnectionEstablisher.py Project: Nafisur-jspl/SoonerSid

 def __init__(self):
     self.session = self.create_sessionid()
     self.apicat = self.get_envariable(
         ConnectionEstablisher.APICLIENT_ACCESS_TOKEN)
     self.mongoaccess = self.get_envariable(
         ConnectionEstablisher.MONGODB_ACCESS)
     self.apisecure = ApiConnector(self.session, self.LANGUAGE, self.apicat)
     self.mongoclient = MongoConnector(self.mongoaccess)

Example #3

0

Show file

File: Main.py Project: alpiii/pyfintech

def three_line_break_analysis_sample():
    """
    three line break technical analysis sample
    """

    # Three Line Break Parameters
    # Currency code to analyse
    currency = 'USD'
    # number of days to control
    line_break_control_count = 3
    # start and end dates of analyse period
    start_day = datetime.datetime(2015, 6, 1)
    end_day = datetime.datetime(2016, 1, 23)

    # Reading rates with given parameters to be analysed
    mc = MongoConnector()
    rates = mc.mongo_get_rates(currency, start_day,
                               end_day, line_break_control_count)

    # creating TLB instance and analysing the data
    tlb = ThreeLineBreakAnalysis(rates, line_break_control_count, start_day)
    result = tlb.analyse()

    # creating the chart of the result
    fig = seaborn.plt.figure(figsize=(14, 5))
    locs, labels = seaborn.plt.xticks()
    fig.axes[0].set_title('Three Line Break (' +
                          str(line_break_control_count) +
                          ')     EUR/' + currency + '     ' +
                          start_day.strftime('%d/%m/%Y') + ' - ' +
                          end_day.strftime('%d/%m/%Y'))
    fig.axes[0].set(xticklabels=[])

    a = 0
    for rs in result:
        fig.axes[0].plot([a, a], [rs.min_price, rs.max_price], linewidth=0)
        if rs.color == 'G':
            # increases are green
            fig.axes[0].add_patch(Rectangle((a, rs.min_price), 1,
                                            rs.max_price - rs.min_price,
                                            fill=False, edgecolor='green',
                                            lw=1))
        else:
            # decreases are red
            fig.axes[0].add_patch(Rectangle((a, rs.min_price), 1,
                                            rs.max_price - rs.min_price,
                                            fill=False, edgecolor='red',
                                            lw=1))
        a += 1

    # saving chart as PNG image file
    fig.savefig('TLB_Result.png', dpi=400, bbox_inches='tight')
    print("Created chart PNG file.")

Example #4

0

Show file

File: Main.py Project: alpiii/pyfintech

def get_live_rates():
    """
    Gets the live rates that European Central Bank served.
    Stores the rates in MongoDB.
    """
    file_name = 'eurofxref-hist.xml'
    file_url = 'https://www.ecb.europa.eu/stats/eurofxref/eurofxref-hist.xml'
    # downloading the XML file
    download_file(file_url, file_name)
    # parsing the XML file to get the rates
    rate_data = read_file(file_name)
    mc = MongoConnector()
    # removing the previous rates
    mc.mongo_delete_rates()
    # inserting rates to db
    mc.mongo_insert_rates(rate_data)

Example #5

0

Show file

File: Main.py Project: alpiii/pyfintech

def rsi_analysis_sample():
    """
    rsi technical analysis sample
    """

    # RSI Parameters
    # Currency code to analyse
    currency = 'USD'
    # RSI number of days for calculation
    number_of_days = 14
    # start and end dates of analyse period
    start_day = datetime.datetime(2015, 1, 1)
    end_day = datetime.datetime(2016, 1, 23)
    # overbought and oversold values
    over_bought = 70
    over_sold = 30

    # Reading rates with given parameters to be analysed
    mc = MongoConnector()
    rates = mc.mongo_get_rates(currency, start_day, end_day, number_of_days)

    # creating RSI instance and analysing the data
    rsi = RSIAnalysis(number_of_days, rates, start_day)
    result = rsi.analyse()

    # creating a chart with two subplots
    # one for RSI values
    # other one for real end-day prices
    fig, axes = seaborn.plt.subplots(2, 1)
    fig.set_size_inches(14, 5)
    locs, labels = seaborn.plt.xticks()
    seaborn.plt.setp(labels, rotation=45)
    axes[0].plot([x.day for x in result], [x.value for x in result])
    axes[0].set_title('RSI(' + str(number_of_days) +
                      ') Overbought: ' + str(over_bought) +
                      ' Oversold: ' + str(over_sold))
    axes[0].set(xticklabels=[])
    axes[0].set(ylim=(0, 100))
    axes[0].plot([x.day for x in result], [over_bought] * len(result))
    axes[0].plot([x.day for x in result], [over_sold] * len(result))
    axes[1].plot([x.day for x in result], [x.price for x in result])
    axes[1].set_title('Real Rates (EUR/' + currency + ')')
    # saving chart as PNG image file
    fig.savefig('RSI_Result.png', dpi=400, bbox_inches='tight')
    print("Created chart PNG file.")

Example #6

0

Show file

File: Main.py Project: alpiii/pyfintech

def bollinger_bands_analysis_sample():
    """
    bollinger bands technical analysis sample
    """

    # Bollinger Bands Parameters
    # Currency code to analyse
    currency = 'USD'
    # number of days for calculation
    number_of_days = 20
    # start and end dates of analyse period
    start_day = datetime.datetime(2015, 1, 1)
    end_day = datetime.datetime(2016, 1, 23)
    # count of standard deviations
    count_of_std = 2

    # Reading rates with given parameters to be analysed
    mc = MongoConnector()
    rates = mc.mongo_get_rates(currency, start_day, end_day, number_of_days)

    # creating Bollinger Bands instance and analysing the data
    bb = BollingerBandsAnalysis(number_of_days, count_of_std, rates, start_day)
    result = bb.analyse()

    # creating the chart
    fig = seaborn.plt.figure(figsize=(18, 5))
    locs, labels = seaborn.plt.xticks()
    fig.axes[0].plot([x.day for x in result], [x.real_price for x in result],
                     label='Real Prices', color='black', alpha=1, lw=1)
    fig.axes[0].plot([x.day for x in result], [x.upper_band for x in result],
                     label='Upper Band', color='orange', alpha=0.5, lw=2)
    fig.axes[0].plot([x.day for x in result], [x.middle_band for x in result],
                     label='Middle Band', color='y', alpha=0.5, lw=2)
    fig.axes[0].plot([x.day for x in result], [x.lower_band for x in result],
                     label='Lower Band', color='red', alpha=0.5, lw=2)
    fig.axes[0].set_title('Bollinger Bands(' + str(number_of_days) +
                          ', ' + str(count_of_std) +
                          ')     EUR/' + currency + '     ' +
                          start_day.strftime('%d/%m/%Y') + ' - ' +
                          end_day.strftime('%d/%m/%Y'))
    seaborn.plt.legend(loc='upper right')
    seaborn.plt.setp(labels, rotation=45)
    # saving chart as PNG image file
    fig.savefig('BollingerBands_Result.png', dpi=400, bbox_inches='tight')
    print("Created chart PNG file.")

Example #7

0

Show file

File: test_mongoConnector.py Project: dennlinger/hypergraph-document-store

 def test_connection(self):
     from MongoConnector import MongoConnector
     with MongoConnector(log_file="test.log") as mc:
         articles = mc.client[mc.secrets['MONGODB_NEWS_DB']].articles
         print(
             articles.find_one({
                 "title":
                 "Rags-to-riches story of Nathan's Famous Hot Dogs"
             }))
     os.remove("test.log")

Example #8

0

Show file

class Get_Retweets(object):

    def __init__(self, mongo_config=MONGO_CONFIG):
        logging.debug("Establishing MongoDB connection with parameters:- {}".format(mongo_config))
        self.cursor = MongoConnector(mongo_config).__connect__()
        logging.info("Established connection...\n")

    def get_retweets(self):
        '''Function that returns the retweets of each tweet collected 
           and stores it in a JSON format
        '''
        tweets_read = 0
        logging.debug("Checking for retweets...")
        with open(OUTPUT_DIRECTORY+F_NAME,'w') as f_out:
            total_tweets = self.cursor.find().count()
            logging.debug("Total Number of tweets (historical tweets) : {0}".format(total_tweets))
            for tweet in self.cursor.find():
                if tweet.get('retweeted_status'):
                    json.dump(tweet, f_out, default=json_util.default)
                    f_out.write('\n')
                    tweets_read += 1
            logging.debug("Total Number of retweets (historical tweets) : {0}".format(tweets_read))
            logging.debug("Total Number of non-retweets (historical tweets) : {0}".format(total_tweets-tweets_read))
        logging.debug("Succesully checked for retweets")

Example #9

0

Show file

File: preprocess2.py Project: Soumithri/Thesis

    'MONGO_DB': 'tweetCorpus',
    'MONGO_HOST': 'localhost',
    'MONGO_PORT': 27017
}

customWords = [
    'bc', 'http', 'https', 'co', 'com', 'rt', 'one', 'us', 'new', 'lol', 'may',
    'get', 'want', 'like', 'love', 'no', 'thank', 'would', 'thanks', 'good',
    'much', 'low', 'roger', 'im'
]
alphabets = list(map(chr, range(97, 123)))
myStopWords = set(
    stopwords.words('english') + list(punctuation) + customWords + alphabets)

# Initialize dbconnector, contracters, tokenizers, lemmatizers
dbconnector = MongoConnector(config)
contracter = PyContract()
tokenizer = TweetTokenizer(strip_handles=True, reduce_len=True)
lemmatizer = WordNetLemmatizer()

log_file = OUTPUT_DIRECTORY + "/pre_processing_log.log"
# Start logging
logging.basicConfig(filename=log_file,
                    level=logging.DEBUG,
                    format='%(asctime)s %(message)s')


def get_Tweets(user: str) -> dict:
    '''
        This function takes the config file and connects to MongoDB collection.
        Retrieves the tweet list from the user id and returns a dict object

Example #10

0

Show file

 def __init__(self, mongo_config=MONGO_CONFIG):
     logging.debug("Establishing MongoDB connection with parameters:- {}".format(mongo_config))
     self.cursor = MongoConnector(mongo_config).__connect__()
     logging.info("Established connection...\n")

Example #11

0

Show file

File: get_followers_friends.py Project: Soumithri/Thesis

import time
import logging
import sys
import json, codecs
import timeit
import os
#import custom libraries
from MongoConnector import MongoConnector

config1 = {
    'MONGO_COLL': 'social_coll',
    'MONGO_DB': 'tweetCorpus',
    'MONGO_HOST': 'localhost',
    'MONGO_PORT': 27017
}
cursor2 = MongoConnector(config1).__connect__()
coll = config1['MONGO_COLL']

##########   TWITTER API ACCESS KEYS AND TOKENS #############
ACCESS_TOKEN_fol = "****"
ACCESS_TOKEN_SECRET_fol = "****"
CONSUMER_KEY_fol = "****"
CONSUMER_SECRET_fol = "****"

ACCESS_TOKEN_fr = "****"
ACCESS_TOKEN_SECRET_fr = "****"
CONSUMER_KEY_fr = "****"
CONSUMER_SECRET_fr = "****"

#############################################################

Example #12

0

Show file

File: retrieve.py Project: EUDAT-SLS/eudat_ols_aggregator

#### DB RELATED STUFF ###
    RESUME = False
    if options.resume:
        optionsrec["RESUME"] = True
    databaserec = None
    if options.mongodb != None:
        optionsrec["DBDRIVE"] = True
        mongoData = json.loads(open(options.mongodb).read())
        client = MongoClient(mongoData["host"], mongoData["port"])
        db = client[mongoData["db"]]

        databaserec = MongoConnector({
            "classes":
            db[mongoData["classes"]],
            "classes_old":
            db[mongoData["classes_old"]],
            "instances":
            db[mongoData["instances"]],
            "instances_old":
            db[mongoData["instances_old"]]
        })

        logging.info("Read DB configuration from " + options.mongodb)

    mandatory = []
    if options.mandatoryfile != None:
        try:
            mandatory = json.loads(open(options.mandatoryfile).read())
        except:
            logging.warning(
                "Failed to read file with mandatory field information.")

Example #13

0

Show file

File: test_mongoConnector.py Project: dennlinger/hypergraph-document-store

 def test___init__(self):
     from MongoConnector import MongoConnector
     mc = MongoConnector(log_file="test.log")
     os.remove("test.log")

Example #14

0

Show file

File: test_mongoConnector.py Project: dennlinger/hypergraph-document-store

 def test___enter__(self):
     from MongoConnector import MongoConnector
     with MongoConnector(log_file="test.log") as mc:
         pass
     os.remove("test.log")

Example #15

0

Show file

File: TermGenerator.py Project: dennlinger/hypergraph-document-store

    def __init__(self,
                 num_distinct_documents=5000,
                 replace_entities=True,
                 max_term_length=127,
                 remove_stopwords=True,
                 custom_stopwords=[
                     ',', '.', '-', '\xa0', '“', '”', '"', '\n', '—', ':', '?',
                     'I', '(', ')'
                 ],
                 analyze=False,
                 document_tabe_name="documents",
                 sentence_table_name="sentences",
                 sentence_fields=OrderedDict({
                     "doc_id": "document_id",
                     "sen_id": "sentence_id",
                     "content": "sentence_text"
                 }),
                 term_table_name="terms",
                 term_sql_format=("term_id", "term_text", "is_entity"),
                 term_occurrence_table_name="term_occurrence",
                 term_occurrence_sql_format=("document_id", "sentence_id",
                                             "term_id"),
                 entity_table_name="entities",
                 entity_sql_format=("entity_id", "entity_type"),
                 database="postgres",
                 user="******",
                 password="******",
                 host="127.0.0.1",
                 port=5435,
                 log_file=os.path.join(os.path.dirname(__file__),
                                       "logs/TermGenerator.log"),
                 log_level=logging.INFO,
                 log_verbose=True):
        """
        Initializes various parameters, registers logger and MongoConnector, and sets up the limit.
        :param num_distinct_documents: (int) The number of distinct documents retrieved from the queries.
               For performance reasons, this should be limited during debugging/development.
               0 (Zero) represents no limit, in accordance with the MongoDB standard for .limit().
        :param replace_entities: (boolean) Whether or not the entities in the text should be replaced/recognised.
               The reason for this is that single terms might be merged together to one term, i.e. first and last name:
               "Dennis" "Aumiller" would be two separate terms in the traditional splitting (replace_entities=False),
               whereas - if set to true - "Dennis Aumiller" would represent only one entity.
        :param max_term_length: (int) Indicator of how long the terms are supposed to be (varchar property in table).
        :param remove_stopwords: (boolean) Determines whether or not stop words are removed. Currently, we are still
               deciding on the final set, but likely either one (or both) of NLTK and SpaCy's stop word lists.
        :param custom_stopwords: (list of strings) Additional words that will not be considered at adding-time.
        :param analyze: (boolean) Whether or not to include analytically relevant metrics.
        :param document_tabe_name: (str) Name of the table where the document information is stored.
        :param sentence_table_name: (str) Name of the table where the sentence information will be stored.
        :param sentence_fields: (OrderedDict) Structure of input to output values from MongoDB to postgres for the
               sentence table and its fields.
        :param term_table_name: (str) Name of the Postgres tables for the terms.
        :param term_sql_format: (tuple) Since those are generated locally, only a tuple of the PostgresColumns suffices.
        :param term_occurrence_table_name: (str) Name of the Postgres table for the term occurrences
        :param term_occurrence_sql_format: (tuple) Same as term_sql_format, but for the term occurrences.
        :param entity_table_name: (str) (Not implemented yet) Name of the table for the entity meta information.
        :param entity_sql_format: (str) Same as term_sql_format, but for entities.
        :param database: (str) database name.
        :param user: (str) User name to get access to the Postgres database.
        :param password: (str) Corresponding user password.
        :param host: (IP) IP address (in string format) for the host of the postgres database.
        :param port: (integer) Port at which to access the database.
        """
        # set up logger
        self.logger = set_up_logger(__name__, log_file, log_level, log_verbose)
        self.logger.info("Successfully registered logger to TermGenerator.")

        # register a MongoConnector
        self.mc = MongoConnector()
        self.logger.info(
            "Successfully registered MongoConnector to TermGenerator.")

        # PostgresConnector
        self.pc = PostgresConnector(database, user, password, host, port)
        self.logger.info(
            "Successfully registered PostgresConnector to DocumentGenerator.")

        self.num_distinct_documents = num_distinct_documents
        # do this earlier since we need it already for the distinct documents.
        self.document_table_name = document_tabe_name
        # get the distinct IDs for the documents so we can match against them later
        # since we have removed parts of the document collection, we have to make sure to get this from Postgres.
        self.logger.info("Parsing relevant documents from Postgres...")
        with self.pc as open_pc:
            open_pc.cursor.execute("SELECT document_id FROM {}".format(
                self.document_table_name))
            self.first_distinct_documents = list(open_pc.cursor.fetchall())
            # extract from the tuple structure
            self.first_distinct_documents = [
                el[0] for el in self.first_distinct_documents
            ]
            self.logger.info("Retrieved all relevant documents from Postgres.")

        # additionally restrict if we want only a number of documents.
        if self.num_distinct_documents != 0:
            self.logger.info(
                "Non-zero limit detected. Limiting to the first N entries.")
            self.first_distinct_documents = self.first_distinct_documents[:self
                                                                          .
                                                                          num_distinct_documents]

        self.replace_entities = replace_entities
        self.analyze = analyze

        self.max_term_length = max_term_length

        self.nlp = spacy.load("en")

        # construct dictionary with the entries per document/sentence id pair. Thus, we can later check whether
        # there are any entities in the current sentence with higher efficiency.
        self.occurrence_dict = {}
        self.occurring_entities = []

        # start building the term dictionary/set, as well as an occurence map. Since terms will be "post-processed",
        # it is first created as a list and later cast to Counter and set.
        self.terms = []  # cast into a set later on.
        self.term_in_sentence = set()
        self.term_id = {}
        self.term_is_entity = {}
        if self.analyze:
            self.term_count = Counter()
            self.entity_count = Counter()

        self.entities = []
        self.sentences = []
        self.processed_sentences = []

        # Postgres tables
        if not sentence_fields:
            self.logger.error("No sentence fields specified!")
        self.sentence_table_name = sentence_table_name
        self.sentence_fields = sentence_fields
        if not term_sql_format:
            self.logger.error("No term fields specified!")
        self.term_table_name = term_table_name
        self.term_sql_format = ", ".join(term_sql_format)
        if not term_occurrence_sql_format:
            self.logger.error("No term occurrence fields specified!")
        self.term_occurrence_table_name = term_occurrence_table_name
        self.term_occurrence_sql_format = ", ".join(term_occurrence_sql_format)
        if not entity_sql_format:
            self.logger.error("No entity fields specified!")
        self.entity_table_name = entity_table_name
        self.entity_sql_format = ", ".join(entity_sql_format)

        # value retrieving parse:
        self.sentence_values_to_retrieve = {
            key: 1
            for key in self.sentence_fields.keys()
        }
        # suppress _id if not present:
        if "_id" not in self.sentence_values_to_retrieve.keys():
            self.sentence_values_to_retrieve["_id"] = 0
        self.sentence_sql_format = ", ".join(
            [value for value in self.sentence_fields.values()])

        # create union of stop words, and add potentially custom stop words
        self.remove_stopwords = remove_stopwords
        self.removed_counter = 0
        self.stopwords = STOP_WORDS.union(set(stopwords.words("english")))
        # add custom stopwords.
        for word in custom_stopwords:
            self.stopwords.add(word)

        self.logger.info("Successfully initialized TermGenerator.")

Example #16

0

Show file

import json
import pymongo
import sys
from pprint import pprint

#import custom libraries
from MongoConnector import MongoConnector

# Load the config dictionary object from the db_config.json file
with open("db_config.json", 'r') as f:
    config = json.load(f)
# print the config file
pprint(config)

# Create a cursor to connect to MongoDB
cursor = MongoConnector(config).__connect__()

# Get a document from the mongodb collection
#documents = cursor.find({}).limit(1)
#pprint(list(documents))

# Load the unique users from the file into a list given by unique_users_list\n",
unique_users_list = list(),
with open("./output/unique_users.txt", 'r', encoding='utf-8') as outfile:
    unique_users_list = outfile.read().splitlines()

# Collect the tweets of the unique users and save them in a dictionary
# Store the user info (id_str, screen_name) , tweet info (tweet_id, tweet, tweet_status, truncated),
# user mention info(mentions, id_str, names, screen-names), favorite info (favorited, favorite_count),
# retweet info (retweeted, retweet_count), reply (replyreply_id, reply_count), quote (quote_status, quote_list)

Example #17

0

Show file

    def __init__(self,
                 fields=OrderedDict({
                     "_id": "document_id",
                     "title": "title",
                     "feedName": "feedName",
                     "category": "category",
                     "feedURL": "feedURL",
                     "published": "published"
                 }),
                 num_distinct_documents=0,
                 document_table_name="documents",
                 database="postgres",
                 user="******",
                 password="******",
                 host="127.0.0.1",
                 port=5435,
                 log_file=os.path.join(os.path.dirname(__file__),
                                       "logs/DocumentGenerator.log"),
                 log_level=logging.INFO,
                 log_verbose=True):
        """
        Initializes context, and sets up documents that will be parsed.
        Also establishes the PostgresConnector that will later be used to push the retrieved documents.
        :param fields: (OrderedDict) Key-value pairs that indicate a mapping of fields that should be retrieved (key),
               and the respective field it should be called in the SQL table. Ordered because SQL tables are.
        :param num_distinct_documents: (int) As the name indicates, the number of distinct articles that should be used.
               Mainly for debugging purposes. 0 means all documents will be used, in accordance with MongoDB standards.
        :param document_table_name: (str) Name of the Postgres table that should contain the documents
        :param database: (str) database name.
        :param user: (str) User name to get access to the Postgres database.
        :param password: (str) Corresponding user password.
        :param host: (IP) IP address (in string format) for the host of the postgres database.
        :param port: (integer) Port at which to access the database.
        :param log_file: (os.path) Path to the file containing the logs.
        :param log_level: (logging.LEVEL) Specifies the level to be logged.
        :param log_verbose: (boolean) Specifies whether or not to look to stdout as well.
        """

        # set up logger
        self.logger = set_up_logger(__name__, log_file, log_level, log_verbose)
        self.logger.info(
            "Successfully registered logger to DocumentGenerator.")

        # register a MongoConnector
        self.mc = MongoConnector()
        self.logger.info(
            "Successfully registered MongoConnector to DocumentGenerator.")

        self.num_distinct_documents = num_distinct_documents
        # get the distinct IDs for the documents so we can match against them later
        if self.num_distinct_documents != 0:
            self.logger.info(
                "Non-zero limit detected. Fetching first N distinct document IDs now..."
            )
            with self.mc as open_mc:
                documents = open_mc.client[open_mc.news].articles
                self.first_documents = list(documents.find().limit(
                    self.num_distinct_documents))
                # for small enough number, and large enough document collection, this is more efficient:
                self.first_documents = [
                    el["_id"] for el in self.first_documents
                ]
                self.logger.info(
                    "Successfully registered relevant document IDs.")
        else:
            # needed to avoid later conflicts
            self.first_documents = []
        # set up PostgresConnector. Since we only use these once, I don't see any reason to store the connection
        # details locally again.
        self.pc = PostgresConnector(database, user, password, host, port)
        self.logger.info(
            "Successfully registered PostgresConnector to DocumentGenerator.")

        # format them into a reasonable format
        self.fields = fields
        if not self.fields:
            self.logger.error("No fields for MongoDB table specified!")
        self.values_to_retrieve = {key: 1 for key in self.fields.keys()}
        # suppress _id if not wanted, as it is returned by default.
        if "_id" not in self.values_to_retrieve.keys():
            self.values_to_retrieve["_id"] = 0
        # TODO
        self.sql_format = ", ".join([value for value in self.fields.values()])
        self.document_table_name = document_table_name

        # preparation for later. According to PEP8
        self.data = []
        self.logger.info("Successfully set up DocumentGenerator.")