def social_reconstruction(keyword_list_file, relations_file):
    
    keyword_list = csv_io.read_csv(keyword_list_file)
    relations = json_io.read_json(relations_file)

    relation_graph = {'nodes': [], 'links': []}

    node_index = {}
    index = 0
    for keyword in keyword_list:
        if keyword not in node_index:
            relation_graph['nodes'].append({'name': keyword, 'group': index, 'ID': index})
            node_index[keyword] = index
            index += 1

    for name, relation in relations.iteritems():
        #total = sum(relation.values())
        for person in relation:
            #if total != 0 and (float(relation[person]) / total > (1.0/len(relation)) - 0.03 ):
            relation_graph['links'].append({'source': node_index[name], 'target': node_index[person],
                                           'value': relation[person], 'label': person })
            relation_graph['links'].append({'source': node_index[person], 'target': node_index[name],
                                           'value': relation[person], 'label': name })
    print relation_graph
    json_io.write_json('output/result/relation_graph.json', relation_graph)
def face_recongnition(position_merge_file):
    
    frame_list = json_io.read_json(position_merge_file) 

    # Read face image
    for frame in frame_list:
       img_name = frame_list[frame]['keyword'].encode('utf8') + str(frame_list[frame]['frame_position']) + '.jpg'
       frame_list[frame]['img'] = cv2.imread( OUTPUT_PATH + "img/" + img_name , 0)
   
    # transforamt to keyword as key 
    keyword_list = {}
    for frame in frame_list:
        keyword = frame_list[frame]['keyword']
        face_id = frame_list[frame]['face_id']
        if keyword not in keyword_list:
            keyword_list[keyword] = {}
        
        if face_id not in keyword_list[keyword]:
            keyword_list[keyword][face_id] = []
        keyword_list[keyword][face_id].append(frame_list[frame])
    
    for keyword, frame_list in keyword_list.iteritems():
        print keyword
        for frame in frame_list:
            for face in frame_list[frame]:
                print face['ID'],
            print 

    global detector
    global matcher

    detector, matcher =  cv_face.init_feature('orb')

    threadLock = threading.Lock() 
    thread_count = 0 
    threads = []
    match_rate = {}

    for keyword, frame_list in keyword_list.iteritems():
        thread = Pthread(thread_count, 'Thread-'+str(thread_count), frame_list, threadLock)
        thread.start()
        threads.append(thread)
        thread_count += 1


    # wait all threads complete 
    for thread in threads:
        thread.join()

    for keyword, frame_list in keyword_list.iteritems():
        print keyword
        for frame in frame_list:
            for face in frame_list[frame]:
                if 'img' in face:
                    del face['img']
                    print face['ID'],
            print 

    json_io.write_json('output/face_recongnition.json', keyword_list)
def build_bipartite_graph(keyword_dic_file):    
    
    keyword_dic = json_io.read_json(keyword_dic_file)
    keyword_dic = weight_normalize(keyword_dic)
    
    pair_bipartite_graph = to_pair(keyword_dic)

    json_io.write_json(OUTPUT_PATH + 'pair_graph.json', pair_bipartite_graph) 
    json_io.write_json(OUTPUT_PATH + 'single_graph.json', keyword_dic)
def build_bipartite_graph(keyword_dic_file):

    keyword_dic = json_io.read_json(keyword_dic_file)
    keyword_dic = weight_normalize(keyword_dic)

    pair_bipartite_graph = to_pair(keyword_dic)

    json_io.write_json(OUTPUT_PATH + 'pair_graph.json', pair_bipartite_graph)
    json_io.write_json(OUTPUT_PATH + 'single_graph.json', keyword_dic)
def relationship_minig(min_votes, iter_stop):

    single_graph_file ='scripts/output/single_graph.json'
    pair_graph_file = 'scripts/output/pair_graph.json'
    social_graph_file = 'scripts/output/single_graph.json'
    dir_file = 'scripts/input/dir_rel.json'
    clip_file = 'scripts/input/clip_rel.json'

    single_graph = json_io.read_json(single_graph_file)
    pair_graph = json_io.read_json(pair_graph_file)
    
    bi_graph, social_graph = graph_init(single_graph, pair_graph, social_graph_file, dir_file, clip_file)

    output_graph = {'nodes':[], "links":[]}
    node_dic = {}
    change = True
    itr = 0
   # iterator algorithm1
    while change:
        role_pair, dominant_keyword, votes = bi_graph.dominant_pair()
        if role_pair is None:
            break

        source, target, dir_prob = bi_graph.get_direction(role_pair, dominant_keyword)
        valid_tag = valid_checking(social_graph, source, target, dominant_keyword)
        
        if source not in node_dic:
            node_dic[source] = len(node_dic)
            output_graph['nodes'].append({"group": node_dic[source], "name": source, "ID": node_dic[source]})
        if target not in node_dic: 
            node_dic[target] = len(node_dic)
            output_graph['nodes'].append({"group": node_dic[target], "name": target, "ID": node_dic[target]})

        if valid_tag != False and votes >= int(min_votes):
            if type(valid_tag) != unicode:
                print source, '-->', dominant_keyword, '-->', target
                add_links(output_graph, source, target, dominant_keyword, votes, node_dic)
                social_graph.relationship_tagging(source, target, dominant_keyword, votes)
            else:
                print source, '-->', valid_tag, '-->', target
                add_links(output_graph, source, target, valid_tag, votes, node_dic)
                social_graph.relationship_tagging(source, target, valid_tag, votes)
            print votes, dir_prob
        
        bi_graph.update_weighting(role_pair, dominant_keyword)
        if valid_tag:
            bi_graph.remove_keyword(role_pair, dominant_keyword)
        else:
            bi_graph.remove_edges(role_pair, dominant_keyword)
        itr += 1
        if itr == int(iter_stop):
            break

    json_io.write_json('result/social_graph.json', output_graph)
    social_graph.clear()
    social_graph.shutdown()
def movie_prosessing(movie_file, two_entity_file, search_result_file):
    two_entity_set = json_io.read_json(two_entity_file)
    keyword_search_result = csv_io.read_csv(search_result_file)

    # load video
    videoInput = cv2.VideoCapture(movie_file)

    # crate a start_frame to end_frame dictionary for two_entity_set look up
    start_end = {}
    for row in keyword_search_result:
        start_frame, end_frame = time_format.to_frame(row)
        while start_frame in start_end:
            start_frame = start_frame + 0.001
        while end_frame in start_end:
            end_frame = end_frame + 0.001
        start_end[start_frame] = end_frame 

    frame = {}
    face_count = 0
    for keyword in two_entity_set:
        for start_frame in two_entity_set[keyword]:
            frame_position = int(start_frame) - 24 * 10
            finish_frame = start_end[start_frame] + 24 * 10
            while frame_position <= finish_frame: 
                print keyword
                videoInput.set(cv2.cv.CV_CAP_PROP_POS_FRAMES, frame_position)
                flag, img = videoInput.read()
                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                gray = cv2.equalizeHist(gray)
                face_position_list, rects = cv_image.face_detect(gray, frame_position, (85, 85))
                #face_position_list, rects =  faceDetection(gray, frame_position)
                if 0xFF & cv2.waitKey(5) == 27:
                    cv2.destroyAllWindows()
                    sys.exit(1)
                
                if len(face_position_list) == 1:
                    print 'detected'
                    image_name = keyword + str(frame_position) + '.jpg'
                    cv_image.output_image(rects, img, OUTPUT_PATH + '/img/' + image_name)
                    for face_position in face_position_list:
                        face_count += 1
                        print face_count
                        frame[face_count] = { 'keyword' : keyword, 
                                                  'face_position': face_position.tolist(),
                                                  'ID' : face_count,
                                                  'frame_position': frame_position,
                                                  'face_id': face_count} 
                frame_position += FRAME_INTERVAL
    #close video  
    videoInput.release()

    json_io.write_json(OUTPUT_PATH + 'frame.json', frame) 
Beispiel #7
0
def tf(terms, output_path):
    term_tf = {}
    for term in terms:
        if term not in term_tf:
            term_tf[term] = 1.0
        else:
            term_tf[term] += 1.0

    length = len(terms)
    for term, count in term_tf.iteritems():
        count = count / length

    json_io.write_json(output_path + doc + '.json', term_tf)
def tf(terms, output_path):
    term_tf = {}
    for term in terms:
        if term not in term_tf:
            term_tf[term] = 1.0
        else:
            term_tf[term] += 1.0

    length = len(terms)
    for term, count in term_tf.iteritems():
        count = count / length
    
    json_io.write_json(output_path+doc+'.json', term_tf)
def position_merge(frame_file):

    frame = json_io.read_json(frame_file)
    
    keys = frame.keys()
    
    for i in range(0, len(frame)):
        for j in range(i+1, len(frame)):
            if is_near( frame[keys[i]], frame[keys[j]] ):
                frame[keys[j]]['face_id'] = frame[keys[i]]['face_id']
                print 111
    
    json_io.write_json(OUTPUT_PATH + 'merge_position.json', frame)
def init_docs(document_list, doc_foldr='data/'):
    doc_id = 1
    documents = []
    doc_hash = {}
    id_hash = {}
    for doc in document_list:
        documents.append(Document(doc_id, doc, doc_foldr))
        doc_hash[doc] = doc_id
        id_hash[doc_id] = doc
        doc_id += 1
    json_io.write_json('output/doc_hash.json', doc_hash)
    json_io.write_json('output/id_hash.json', id_hash)
    return documents
def init_docs(document_list, doc_foldr='data/'):
    doc_id = 1
    documents = []
    doc_hash = {}
    id_hash = {}
    for doc in document_list:
        documents.append(Document(doc_id, doc, doc_foldr))
        doc_hash[doc] = doc_id
        id_hash[doc_id] = doc
        doc_id += 1
    json_io.write_json('output/doc_hash.json', doc_hash)
    json_io.write_json('output/id_hash.json', id_hash)
    return documents
def find_relation(keyword_list_file, search_result_file, time_interval):
 
    time_to_keyword = csv_io.read_csv(search_result_file)
    keyword_list = csv_io.read_csv(keyword_list_file)
    leading_keyword = keyword_list[0]
    
    frame_to_keyword = {}
    for row in time_to_keyword:
        start_frame, end_frame = time_format.to_frame(row)
        while start_frame in frame_to_keyword:
            start_frame = start_frame + 0.001
        while end_frame in frame_to_keyword:
            end_frame = end_frame + 0.001
        frame_to_keyword[start_frame] = row[1]
        
    # Transfrom to timeline format
    frame_list = frame_to_keyword.keys()
    frame_list.sort()
    
    relations = {}
    for i in range(1, len(keyword_list)):
        relations.update( {keyword_list[i] : count_ralation(keyword_list[i], frame_list, frame_to_keyword, time_interval)} )

    count = 0
    proper_relation = {}
    for name, relation in relations.iteritems():
        total = sum(relation.values())
        proper_relation[name] = {}
        print name, 
        for person in relation:
            if proper_test(total, leading_keyword, person, relation):
                proper_relation[name][person] = relation[person]
                print person , relation[person],
                count += 1
        print


    print str(time_interval/(24*60)) + ',' + str(count)
    json_io.write_json('output/relations.json', proper_relation)
Beispiel #13
0
        pages_dic[movie_title] = curr_count

    return pages, graph

def get_movie_title(page):
    soup = BeautifulSoup(page)
    
    movie_title = soup.find('h1', class_='header') \
                      .find('span', class_='itemprop') \
                      .get_text()

    return soup, movie_title

def http_get(url):
    return urllib2.urlopen(url).read()

if __name__=='__main__':
    if len(sys.argv) != 2:
        print >> sys.stderr, "Usage: <file>"
        exit(-1)

    f = open(sys.argv[1])

    urls = f.readlines()

    pages, graph = movie_graph(urls)
    f.close()

    json_io.write_json('dataset/imdb.json', pages)
    json_io.write_json('dataset/graph_9.json', graph)
def video_processing(movie_file, role_list_file, search_result_file,
                     role_input_way):

    # load frame-keyword files
    keyword_search_result = csv_io.read_csv(search_result_file)
    role_list = csv_io.read_csv(role_list_file)

    # load video
    videoInput = cv2.VideoCapture(movie_file)

    frame = {}
    keyword_id = 0
    frame_number = 0
    for row in keyword_search_result:

        start_frame, end_frame, keyword = float(row[0]), float(row[1]), row[2]
        frame_position = round(start_frame) + 24 * BACKWORD_EXPAND_TIME
        finish_frame = round(end_frame) + 24 * FORWORD_EXPAND_TIME

        keyword_id += 1
        keyword_time = keyword + '_t' + str(keyword_id)
        while frame_position <= finish_frame:

            face_position_list, rects, img = frame_caputre(
                videoInput, frame_position)

            if len(face_position_list) >= 1:
                print "detect face..."

                image_name = OUTPUT_PATH + 'img/' + keyword_time + str(
                    frame_number)
                #if role_input_way == 0:
                cv_image.output_image(rects, img, image_name)

                count = 0
                for face_position in face_position_list:

                    if role_input_way == 'auto':
                        break
                        role_name = role_identify(
                            image_name + '-' + str(count) + '.jpg', role_list)
                    else:
                        role_name = role_input(role_list)
                    count += 1
                    if role_name == -1:
                        continue
                    else:
                        if keyword_time not in frame:
                            print keyword_time, role_name
                            frame[keyword_time] = {}
                            frame[keyword_time][role_name] = {
                                'keyword': keyword,
                                'face_position': face_position.tolist(),
                                'frame_position': frame_position,
                                'keyword_id': keyword_id,
                                'weight': 1,
                                'speaker': True
                            }
                        elif role_name in frame[keyword_time]:
                            frame[keyword_time][role_name]['weight'] += 1
                        else:
                            frame[keyword_time][role_name] = {
                                'keyword': keyword,
                                'face_position': face_position.tolist(),
                                'frame_position': frame_position,
                                'keyword_id': keyword_id,
                                'weight': 1,
                                'speaker': False
                            }
            frame_number += 1
            frame_position += FRAME_INTERVAL

    #close video
    videoInput.release()

    json_io.write_json(OUTPUT_PATH + 'keywordt_roles.json', frame)
            if j > 0:
                graph[node].append(node - 1)
            if j < (grid_size):
                graph[node].append(node + 1)
            node += 1
    return graph

def rewire_edge(graph, prob):
    
    for node, out_nodes in graph.iteritems():
        if random.uniform(0, 1) < prob:
            pick = random.randint(0, len(out_nodes)-1)
                
            new_edge = random.randint(1, len(graph.keys()))
            while new_edge in out_nodes or new_edge == node:
                new_edge = random.randint(1, len(graph.keys()))
            out_nodes.append(new_edge)
            out_nodes.pop(pick)
    return graph

if __name__=='__main__':
    
    p1 = 0.2
    p2 = 0.8

    lp_graph = get_lp_graph(4)
    rewire_graph1 = rewire_edge(lp_graph.copy(), p1)
    rewire_graph2 = rewire_edge(lp_graph.copy(), p2)
    json_io.write_json('dataset/graph_7.json', rewire_graph1)
    json_io.write_json('dataset/graph_8.json', rewire_graph2)
Beispiel #16
0
            if j < (grid_size):
                graph[node].append(node + 1)
            node += 1
    return graph


def rewire_edge(graph, prob):

    for node, out_nodes in graph.iteritems():
        if random.uniform(0, 1) < prob:
            pick = random.randint(0, len(out_nodes) - 1)

            new_edge = random.randint(1, len(graph.keys()))
            while new_edge in out_nodes or new_edge == node:
                new_edge = random.randint(1, len(graph.keys()))
            out_nodes.append(new_edge)
            out_nodes.pop(pick)
    return graph


if __name__ == '__main__':

    p1 = 0.2
    p2 = 0.8

    lp_graph = get_lp_graph(4)
    rewire_graph1 = rewire_edge(lp_graph.copy(), p1)
    rewire_graph2 = rewire_edge(lp_graph.copy(), p2)
    json_io.write_json('dataset/graph_7.json', rewire_graph1)
    json_io.write_json('dataset/graph_8.json', rewire_graph2)
def video_processing(movie_file, role_list_file, search_result_file, role_input_way):

    # load frame-keyword files
    keyword_search_result = csv_io.read_csv(search_result_file)
    role_list = csv_io.read_csv(role_list_file)

    # load video
    videoInput = cv2.VideoCapture(movie_file)

    frame = {}
    keyword_id = 0
    frame_number = 0
    for row in keyword_search_result:

        start_frame, end_frame, keyword = float(row[0]), float(row[1]), row[2]
        frame_position = round(start_frame) + 24 * BACKWORD_EXPAND_TIME
        finish_frame = round(end_frame) + 24 * FORWORD_EXPAND_TIME
        
        keyword_id += 1
        keyword_time = keyword + '_t' + str(keyword_id)
        while frame_position <= finish_frame: 

            face_position_list, rects, img = frame_caputre(videoInput, frame_position)
            
            if len(face_position_list) >= 1:
                print "detect face..."
                
                image_name = OUTPUT_PATH + 'img/' + keyword_time + str(frame_number) 
                #if role_input_way == 0:
                cv_image.output_image(rects, img, image_name)
              
                count = 0
                for face_position in face_position_list:
                    
                    if role_input_way == 'auto':
                        break
                        role_name = role_identify( image_name + '-' + str(count) + '.jpg', role_list)
                    else:
                        role_name = role_input(role_list)
                    count += 1
                    if role_name == -1:
                        continue
                    else:
                        if keyword_time not in frame:
                            print keyword_time, role_name
                            frame[keyword_time] = {}
                            frame[keyword_time][role_name] = {'keyword' : keyword, 
                                                              'face_position' : face_position.tolist(),
                                                              'frame_position' : frame_position,
                                                              'keyword_id' : keyword_id,
                                                              'weight' : 1,
                                                              'speaker': True} 
                        elif role_name in frame[keyword_time]:
                            frame[keyword_time][role_name]['weight'] += 1
                        else:
                            frame[keyword_time][role_name] = {'keyword' : keyword, 
                                                              'face_position' : face_position.tolist(),
                                                              'frame_position' : frame_position,
                                                              'keyword_id' : keyword_id,
                                                              'weight' : 1,
                                                              'speaker': False} 
            frame_number += 1
            frame_position += FRAME_INTERVAL

    #close video  
    videoInput.release()

    json_io.write_json(OUTPUT_PATH + 'keywordt_roles.json', frame) 
Beispiel #18
0
def relationship_minig(min_votes, iter_stop):

    single_graph_file = 'scripts/output/single_graph.json'
    pair_graph_file = 'scripts/output/pair_graph.json'
    social_graph_file = 'scripts/output/single_graph.json'
    dir_file = 'scripts/input/dir_rel.json'
    clip_file = 'scripts/input/clip_rel.json'

    single_graph = json_io.read_json(single_graph_file)
    pair_graph = json_io.read_json(pair_graph_file)

    bi_graph, social_graph = graph_init(single_graph, pair_graph,
                                        social_graph_file, dir_file, clip_file)

    output_graph = {'nodes': [], "links": []}
    node_dic = {}
    change = True
    itr = 0
    # iterator algorithm1
    while change:
        role_pair, dominant_keyword, votes = bi_graph.dominant_pair()
        if role_pair is None:
            break

        source, target, dir_prob = bi_graph.get_direction(
            role_pair, dominant_keyword)
        valid_tag = valid_checking(social_graph, source, target,
                                   dominant_keyword)

        if source not in node_dic:
            node_dic[source] = len(node_dic)
            output_graph['nodes'].append({
                "group": node_dic[source],
                "name": source,
                "ID": node_dic[source]
            })
        if target not in node_dic:
            node_dic[target] = len(node_dic)
            output_graph['nodes'].append({
                "group": node_dic[target],
                "name": target,
                "ID": node_dic[target]
            })

        if valid_tag != False and votes >= int(min_votes):
            if type(valid_tag) != unicode:
                print source, '-->', dominant_keyword, '-->', target
                add_links(output_graph, source, target, dominant_keyword,
                          votes, node_dic)
                social_graph.relationship_tagging(source, target,
                                                  dominant_keyword, votes)
            else:
                print source, '-->', valid_tag, '-->', target
                add_links(output_graph, source, target, valid_tag, votes,
                          node_dic)
                social_graph.relationship_tagging(source, target, valid_tag,
                                                  votes)
            print votes, dir_prob

        bi_graph.update_weighting(role_pair, dominant_keyword)
        if valid_tag:
            bi_graph.remove_keyword(role_pair, dominant_keyword)
        else:
            bi_graph.remove_edges(role_pair, dominant_keyword)
        itr += 1
        if itr == int(iter_stop):
            break

    json_io.write_json('result/social_graph.json', output_graph)
    social_graph.clear()
    social_graph.shutdown()
Beispiel #19
0
                nodes.append(str(i))
            
        graph = Graph(graph, nodes)

    s_rank = datetime.datetime.now()
    rank = page_rank(graph, 20, 0.85)
    e_rank = datetime.datetime.now()

    s_hits = datetime.datetime.now()
    auth, hubs = hits(graph, 20)
    sorted_auth = sorted(auth.items(), key=operator.itemgetter(1))
    sorted_hubs = sorted(hubs.items(), key=operator.itemgetter(1))
    e_hits = datetime.datetime.now()
    print rank
    print auth
    print hubs

    output_path = 'dist/' + sys.argv[1].split('/')[1][:-4] 
    if sys.argv[1][-4:] != 'json':
        sim = sim_rank(graph)
        np.savetxt(output_path + '_sim_rank', sim, fmt='%.2e')
        f.close()

    json_io.write_json(output_path + '_rank.json', rank)
    json_io.write_json(output_path + '_auth.json', auth)
    json_io.write_json(output_path + '_hubs.json', hubs)
    t_rank =  e_rank - s_rank
    t_hits = e_hits - s_hits
    print t_rank.microseconds
    print t_hits.microseconds