def __init__(self):
    # Read earliest possible trade dates. First one is mapping from coin name to
    # earliest trade date and second one is reverse: mapping from date to list of
    # coins whose earliest trade is the key date.
    self.earliest_trade_date_by_coin = dict()
    coins_by_earliest_trade_date = collections.defaultdict(set)   # Dummy
    utils.read_earliest_trade_dates(args.coins_earliest_trade_dates,
                                    self.earliest_trade_date_by_coin,
                                    coins_by_earliest_trade_date)
    
    # mapping from coin symbol to its name to be included in outputs
    self.name_by_coin = dict()
    coin_by_name = dict()
    utils.read_coin_name_symbols(args.coins_earliest_trade_dates,
                                 self.name_by_coin,
                                 coin_by_name)
   
    # mapping from user to date of first post
    self.first_post_per_user = dict()

    # mapping from user to another mapping from date to number of posts made up to that
    # date
    self.num_posts_per_user = collections.defaultdict(
        lambda: collections.defaultdict(int))

    # mapping from user to another mapping from date to number of subjects initiated by
    # the user up to that date
    self.num_subjects_per_user = collections.defaultdict(
        lambda: collections.defaultdict(int))

    self.first_mention_date_by_coin = dict()
    # A mapping from coin name to the first user that mentions that coin. This could have
    # been a simple dict, but to be able to call utils method it's a map to another dict.
    # The number of mentions for any user will always be one, and we only track of one
    # user per coin.
    self.first_mention = collections.defaultdict(lambda: collections.defaultdict(int)) 
    # Keeps track of url and date of the first mention of each coin by users in the second
    # list. A mapping from coin name to the URL and date of the first mention.
    self.first_mention_date_url = collections.defaultdict(
        lambda: collections.defaultdict(list))
    def __init__(self):
        # Read earliest possible trade dates. First one is mapping from coin name to
        # earliest trade date and second one is reverse: mapping from date to list of
        # coins whose earliest trade is the key date.
        self.earliest_trade_date_by_coin = dict()
        coins_by_earliest_trade_date = collections.defaultdict(set)  # Dummy
        utils.read_earliest_trade_dates(args.coins_earliest_trade_dates,
                                        self.earliest_trade_date_by_coin,
                                        coins_by_earliest_trade_date)

        # mapping from coin symbol to its name to be included in outputs
        self.name_by_coin = dict()
        coin_by_name = dict()
        utils.read_coin_name_symbols(args.coins_earliest_trade_dates,
                                     self.name_by_coin, coin_by_name)

        # mapping from user to date of first post
        self.first_post_per_user = dict()

        # mapping from user to another mapping from date to number of posts made up to that
        # date
        self.num_posts_per_user = collections.defaultdict(
            lambda: collections.defaultdict(int))

        # mapping from user to another mapping from date to number of subjects initiated by
        # the user up to that date
        self.num_subjects_per_user = collections.defaultdict(
            lambda: collections.defaultdict(int))

        self.first_mention_date_by_coin = dict()
        # A mapping from coin name to the first user that mentions that coin. This could have
        # been a simple dict, but to be able to call utils method it's a map to another dict.
        # The number of mentions for any user will always be one, and we only track of one
        # user per coin.
        self.first_mention = collections.defaultdict(
            lambda: collections.defaultdict(int))
        # Keeps track of url and date of the first mention of each coin by users in the second
        # list. A mapping from coin name to the URL and date of the first mention.
        self.first_mention_date_url = collections.defaultdict(
            lambda: collections.defaultdict(list))
  def __init__(self):
    # Read earliest possible introduction dates. First one is mapping from coin name to
    # earliest trade date and second one is reverse: mapping from date to list of
    # coins whose earliest trade is the key date.
    self.earliest_trade_date_by_unmodified_coin = dict()
    unmodified_coins_by_earliest_trade_date = collections.defaultdict(set)   # Dummy
    utils.read_earliest_trade_dates(args.unmodified_coins_earliest_trade_dates,
                              self.earliest_trade_date_by_unmodified_coin,
                              unmodified_coins_by_earliest_trade_date)

    # mapping from coin symbol to its name to be included in outputs
    self.name_by_unmodified_coin = dict()
    coin_by_unmodified_name = dict()
    utils.read_coin_name_symbols(args.unmodified_coins_earliest_trade_dates,
                                 self.name_by_unmodified_coin,
                                 coin_by_unmodified_name)
   
    # mapping from coin to earliest mention in the forum or earliest mention in the first
    # post of a new thread.
    self.first_mention_date_by_unmodified_coin = dict()
    self.first_thread_post_mention_date_by_unmodified_coin = dict()

    # Used for tracking users in 1st list: 
    # The users who have mentioned the unmodified coin name AND symbol for
    # the first time in any post any date
    # A mapping from coin name to the first user that mentions that coin. This could have
    # been a simple dict. update.....
    # The number of mentions for any user will always be one, and we will only track of
    # one user per coin.
    # This could be full of false positives, and that's why we used to  look at number of
    # mentions, but folks disagreed...
    # It could still be useful if coin mentions are assigned when both name and symbol are
    # present in the text.
    self.unmodified_first_mention = collections.defaultdict(
        lambda: collections.defaultdict(int)) 
    # Keeps track of url and date of the first mention of each coin by users. A mapping
    # from coin name to the URL and date of the first mention.
    self.unmodified_first_mention_date_url = collections.defaultdict(
        lambda: collections.defaultdict(list))

    
    # Used for tracking users in 2nd list: 
    # The users who have mentioned the unmodified coin name for the first time in a post
    # that starts a new thread any date.
    self.unmodified_first_thread_post_mention = collections.defaultdict(
        lambda: collections.defaultdict(int)) 
    # Keeps track of url and date of the first mention of each coin in first post of a
    # thread by users
    self.unmodified_first_thread_post_mention_date_url = collections.defaultdict(
        lambda: collections.defaultdict(list))

    # mapping from user to date of first post
    self.first_post_per_user = dict()

    # mapping from user to another mapping from date to number of posts made up to that
    # date
    self.num_posts_per_user = collections.defaultdict(
        lambda: collections.defaultdict(int))

    # mapping from user to another mapping from date to number of subjects initiated by
    # the user up to that date
    self.num_subjects_per_user = collections.defaultdict(
        lambda: collections.defaultdict(int))
    def __init__(self):
        # Read earliest possible introduction dates. First one is mapping from coin name to
        # earliest trade date and second one is reverse: mapping from date to list of
        # coins whose earliest trade is the key date.
        self.earliest_trade_date_by_unmodified_coin = dict()
        unmodified_coins_by_earliest_trade_date = collections.defaultdict(
            set)  # Dummy
        utils.read_earliest_trade_dates(
            args.unmodified_coins_earliest_trade_dates,
            self.earliest_trade_date_by_unmodified_coin,
            unmodified_coins_by_earliest_trade_date)

        # mapping from coin symbol to its name to be included in outputs
        self.name_by_unmodified_coin = dict()
        coin_by_unmodified_name = dict()
        utils.read_coin_name_symbols(
            args.unmodified_coins_earliest_trade_dates,
            self.name_by_unmodified_coin, coin_by_unmodified_name)

        # mapping from coin to earliest mention in the forum or earliest mention in the first
        # post of a new thread.
        self.first_mention_date_by_unmodified_coin = dict()
        self.first_thread_post_mention_date_by_unmodified_coin = dict()

        # Used for tracking users in 1st list:
        # The users who have mentioned the unmodified coin name AND symbol for
        # the first time in any post any date
        # A mapping from coin name to the first user that mentions that coin. This could have
        # been a simple dict. update.....
        # The number of mentions for any user will always be one, and we will only track of
        # one user per coin.
        # This could be full of false positives, and that's why we used to  look at number of
        # mentions, but folks disagreed...
        # It could still be useful if coin mentions are assigned when both name and symbol are
        # present in the text.
        self.unmodified_first_mention = collections.defaultdict(
            lambda: collections.defaultdict(int))
        # Keeps track of url and date of the first mention of each coin by users. A mapping
        # from coin name to the URL and date of the first mention.
        self.unmodified_first_mention_date_url = collections.defaultdict(
            lambda: collections.defaultdict(list))

        # Used for tracking users in 2nd list:
        # The users who have mentioned the unmodified coin name for the first time in a post
        # that starts a new thread any date.
        self.unmodified_first_thread_post_mention = collections.defaultdict(
            lambda: collections.defaultdict(int))
        # Keeps track of url and date of the first mention of each coin in first post of a
        # thread by users
        self.unmodified_first_thread_post_mention_date_url = collections.defaultdict(
            lambda: collections.defaultdict(list))

        # mapping from user to date of first post
        self.first_post_per_user = dict()

        # mapping from user to another mapping from date to number of posts made up to that
        # date
        self.num_posts_per_user = collections.defaultdict(
            lambda: collections.defaultdict(int))

        # mapping from user to another mapping from date to number of subjects initiated by
        # the user up to that date
        self.num_subjects_per_user = collections.defaultdict(
            lambda: collections.defaultdict(int))
Example #5
0
    def __init__(self):
        # Read earliest possible introduction dates. First one is mapping from coin name to
        # earliest trade date and second one is reverse: mapping from date to list of
        # coins whose earliest trade is the key date.
        self.earliest_trade_date_by_modified_coin = dict()
        modified_coins_by_earliest_trade_date = collections.defaultdict(
            set)  # Dummy
        utils.read_earliest_trade_dates(
            args.modified_coins_earliest_trade_dates,
            self.earliest_trade_date_by_modified_coin,
            modified_coins_by_earliest_trade_date)
        self.earliest_trade_date_by_unmodified_coin = dict()
        unmodified_coins_by_earliest_trade_date = collections.defaultdict(
            set)  # Dummy
        utils.read_earliest_trade_dates(
            args.unmodified_coins_earliest_trade_dates,
            self.earliest_trade_date_by_unmodified_coin,
            unmodified_coins_by_earliest_trade_date)

        # mapping from coin symbol to its name to be included in outputs
        self.name_by_modified_coin = dict()
        coin_by_modified_name = dict()
        utils.read_coin_name_symbols(args.modified_coins_earliest_trade_dates,
                                     self.name_by_modified_coin,
                                     coin_by_modified_name)
        self.name_by_unmodified_coin = dict()
        coin_by_unmodified_name = dict()
        utils.read_coin_name_symbols(
            args.unmodified_coins_earliest_trade_dates,
            self.name_by_unmodified_coin, coin_by_unmodified_name)

        # mapping from coin to earliest mention in the forum or earliest mention in the first
        # post of a new thread.
        self.first_mention_date_by_modified_coin = dict()
        self.first_thread_post_mention_date_by_modified_coin = dict()
        self.first_mention_date_by_unmodified_coin = dict()
        self.first_thread_post_mention_date_by_unmodified_coin = dict()

        # Used for tracking users in 1st list:
        # 1-The users who have mentioned the modified coin name OR symbol the most in any post
        # before coin earliest date.
        # A mapping from coin names to list of users along with the number of times they
        # mentioned the coin in their posts. The counts will be tracked only up to the
        # earliest trade date of the coin.
        self.modified_mentions_per_user = collections.defaultdict(
            lambda: collections.defaultdict(int))
        # Keeps track of urls where users in 1st list mentioned the coin.
        # A mapping from coin names to list of users along with the first k URLS they
        # mentioned the coin
        self.modified_urls_per_user = collections.defaultdict(
            lambda: collections.defaultdict(list))

        # Used for tracking users in 2nd list:
        # 2-The users who have mentioned the unmodified coin name AND symbol for
        # the first time in any post any date
        # A mapping from coin name to the first user that mentions that coin. This could have
        # been a simple dict, but for consistency, the structure of this is similar to
        # coin_mention maps. The number of mentions for any user will always be one, and we
        # will only track of one user per coin.
        # This could be full of false positives, and that's why we look at number of mentions.
        # It could still be useful if coin mentions are assigned when both name and symbol are
        # present in the text.
        self.unmodified_first_mention = collections.defaultdict(
            lambda: collections.defaultdict(int))
        # Keeps track of url and date of the first mention of each coin by users in the second
        # list. A mapping from coin name to the URL and date of the first mention.
        self.unmodified_first_mention_date_url = collections.defaultdict(
            lambda: collections.defaultdict(list))

        # Used for tracking users in 3rd list:
        # 3-The users who have mentioned both the unmodified coin name AND symbol the most in
        # any post before coin earliest date
        self.unmodified_mentions_per_user = collections.defaultdict(
            lambda: collections.defaultdict(int))
        self.unmodified_urls_per_user = collections.defaultdict(
            lambda: collections.defaultdict(list))

        # Used for tracking users in 4th list:
        # 4-The users who have mentioned both the unmodified coin name AND symbol
        # for the first time in a post that starts a new thread any date.
        # This could have been a simple dict, but for consistency we employed the same
        # structure and coin_mention maps.
        self.unmodified_first_thread_post_mention = collections.defaultdict(
            lambda: collections.defaultdict(int))
        # Keeps track of url and date of the first mention of each coin in first post of a
        # thread by users in the fourth list.
        self.unmodified_first_thread_post_mention_date_url = collections.defaultdict(
            lambda: collections.defaultdict(list))

        # mapping from user to date of first post
        self.first_post_per_user = dict()

        # mapping from user to another mapping from date to number of posts made up to that
        # date
        self.num_posts_per_user = collections.defaultdict(
            lambda: collections.defaultdict(int))

        # mapping from user to another mapping from date to number of subjects initiated by
        # the user up to that date
        self.num_subjects_per_user = collections.defaultdict(
            lambda: collections.defaultdict(int))