Esempio n. 1
0
def main(argv):

    inp_file, out_file = argv
    
    graph = {}
    
    out_put = open(out_file,'w')
    
    with open(inp_file) as input_file:
        for line in input_file:
            line = line.rstrip()
            line_json = json.loads(line)
            try:
                created_at = dtparser.parse(line_json["created_at"])
                clean, cleaned_line = _clean_string(line_json['text'])
                hash_tags = get_hashtag(cleaned_line)
                
                # update graph if there are more than one hashtags
                if hash_tags and len(hash_tags) > 1:
                    graph = update_or_build_graph(graph, hash_tags, created_at)
                    
                # update graph to remove edges created more than 60 seconds ago
                graph = shuffle_graph(graph, created_at)
                avg_degree =  calculate_avg_degree(graph)
                out_put.write(str(avg_degree)+"\n")
                
            except Exception as e:
                # this except block is here to handle the following sample limit lines
                # {"limit":{"track":19,"timestamp_ms":"1446218985758"}}
                pass
    out_put.close()
    input_file.close()
def main(argv):

    inp_file, out_file = argv

    graph = {}

    out_put = open(out_file, 'w')

    with open(inp_file) as input_file:
        for line in input_file:
            line = line.rstrip()
            line_json = json.loads(line)
            try:
                created_at = dtparser.parse(line_json["created_at"])
                clean, cleaned_line = _clean_string(line_json['text'])
                hash_tags = get_hashtag(cleaned_line)

                # update graph if there are more than one hashtags
                if hash_tags and len(hash_tags) > 1:
                    graph = update_or_build_graph(graph, hash_tags, created_at)

                # update graph to remove edges created more than 60 seconds ago
                graph = shuffle_graph(graph, created_at)
                avg_degree = calculate_avg_degree(graph)
                out_put.write(str(avg_degree) + "\n")

            except Exception as e:
                # this except block is here to handle the following sample limit lines
                # {"limit":{"track":19,"timestamp_ms":"1446218985758"}}
                pass
    out_put.close()
    input_file.close()
Esempio n. 3
0
    def test_rolling_avg_degree(self):

        for tweet in self.tweet_list:
            created_at = dtparser.parse(tweet["created_at"])
            clean, cleaned_line = _clean_string(tweet['text'])
            hash_tags = get_hashtag(cleaned_line)
            if hash_tags and len(hash_tags) > 1:
                self.graph = update_or_build_graph(
                    self.graph, hash_tags, created_at)
                # print self.graph
            self.graph = shuffle_graph(self.graph, created_at)
            self.avg_degree_list.append(calculate_avg_degree(self.graph))
        #print self.avg_degree_list
        self.assertEqual(self.avg_degree_list, [1.0, 2.0, 2.0, 2.0, 1.67],
                         'incorrect average degree')
Esempio n. 4
0
    def test_rolling_avg_degree(self):

        for tweet in self.tweet_list:
            created_at = dtparser.parse(tweet["created_at"])
            clean, cleaned_line = _clean_string(tweet['text'])
            hash_tags = get_hashtag(cleaned_line)
            if hash_tags and len(hash_tags) > 1:
                self.graph = update_or_build_graph(self.graph, hash_tags,
                                                   created_at)
                # print self.graph
            self.graph = shuffle_graph(self.graph, created_at)
            self.avg_degree_list.append(calculate_avg_degree(self.graph))
        #print self.avg_degree_list
        self.assertEqual(self.avg_degree_list, [1.0, 2.0, 2.0, 2.0, 1.67],
                         'incorrect average degree')