def main(argv): inp_file, out_file = argv graph = {} out_put = open(out_file,'w') with open(inp_file) as input_file: for line in input_file: line = line.rstrip() line_json = json.loads(line) try: created_at = dtparser.parse(line_json["created_at"]) clean, cleaned_line = _clean_string(line_json['text']) hash_tags = get_hashtag(cleaned_line) # update graph if there are more than one hashtags if hash_tags and len(hash_tags) > 1: graph = update_or_build_graph(graph, hash_tags, created_at) # update graph to remove edges created more than 60 seconds ago graph = shuffle_graph(graph, created_at) avg_degree = calculate_avg_degree(graph) out_put.write(str(avg_degree)+"\n") except Exception as e: # this except block is here to handle the following sample limit lines # {"limit":{"track":19,"timestamp_ms":"1446218985758"}} pass out_put.close() input_file.close()
def main(argv): inp_file, out_file = argv graph = {} out_put = open(out_file, 'w') with open(inp_file) as input_file: for line in input_file: line = line.rstrip() line_json = json.loads(line) try: created_at = dtparser.parse(line_json["created_at"]) clean, cleaned_line = _clean_string(line_json['text']) hash_tags = get_hashtag(cleaned_line) # update graph if there are more than one hashtags if hash_tags and len(hash_tags) > 1: graph = update_or_build_graph(graph, hash_tags, created_at) # update graph to remove edges created more than 60 seconds ago graph = shuffle_graph(graph, created_at) avg_degree = calculate_avg_degree(graph) out_put.write(str(avg_degree) + "\n") except Exception as e: # this except block is here to handle the following sample limit lines # {"limit":{"track":19,"timestamp_ms":"1446218985758"}} pass out_put.close() input_file.close()
def test_rolling_avg_degree(self): for tweet in self.tweet_list: created_at = dtparser.parse(tweet["created_at"]) clean, cleaned_line = _clean_string(tweet['text']) hash_tags = get_hashtag(cleaned_line) if hash_tags and len(hash_tags) > 1: self.graph = update_or_build_graph( self.graph, hash_tags, created_at) # print self.graph self.graph = shuffle_graph(self.graph, created_at) self.avg_degree_list.append(calculate_avg_degree(self.graph)) #print self.avg_degree_list self.assertEqual(self.avg_degree_list, [1.0, 2.0, 2.0, 2.0, 1.67], 'incorrect average degree')
def test_rolling_avg_degree(self): for tweet in self.tweet_list: created_at = dtparser.parse(tweet["created_at"]) clean, cleaned_line = _clean_string(tweet['text']) hash_tags = get_hashtag(cleaned_line) if hash_tags and len(hash_tags) > 1: self.graph = update_or_build_graph(self.graph, hash_tags, created_at) # print self.graph self.graph = shuffle_graph(self.graph, created_at) self.avg_degree_list.append(calculate_avg_degree(self.graph)) #print self.avg_degree_list self.assertEqual(self.avg_degree_list, [1.0, 2.0, 2.0, 2.0, 1.67], 'incorrect average degree')