コード例 #1
0
ファイル: mergeUtils.py プロジェクト: daleathan/gedMerge
def checkFam(wid,mid):
  fams = set()
  for role in ('husb', 'wife', 'children'):
    tFam = common.config['families'].find({role: wid}, {'_id': 1, 'marriage.date': 1} )
    rFam = common.config['match_families'].find({role: mid}, {'_id': 1, 'marriage.date': 1} )
    tfams = []
    tDone = []
#take all combinations of families
#if several posibilities for 1 work-family keep only the the pair where marriage-date matches
    for f in tFam:
        for ff in rFam:
            try:
                if len(f['marriage']['date'])>4 and f['marriage']['date'] == ff['marriage']['date']:
                    fams.add((f['_id'], ff['_id']))
                    tDone.append(f['_id'])
                else:
                    tfams.append([f['_id'], f['marriage']['date'], ff['_id'], ff['marriage']['date']])
            except:
                fams.add((f['_id'], ff['_id']))
    for l in tfams:
        if l[0] not in tDone:
            fams.add((l[0], l[2]))
#?#
    for (tFamId,rFamId) in fams:    #  for all involved families
        print 'checking',tFamId,rFamId
        famMatchData = matchFam(tFamId, rFamId, config)
        if common.config['fam_matches'].find({'workid': tFamId, 'matchid': rFamId}).count() == 0:
            if famMatchData['status'] in common.statOK.union(common.statManuell):
                #fam_matches.insert(famMatchData)
                print 'NY',famMatchData['workRefId'],famMatchData['workRefId'],famMatchData['status']
コード例 #2
0
ファイル: featureSet.py プロジェクト: andersardo/gedMerge
def famBaseline(work, match, config):
#def svmFamily(work, match, config):
#    work = config['families'].find_one({'refId': wid})
#    match = config['match_families'].find_one({'refId': mid})
    if not work or not match: return None
    fmatch = config['fam_matches'].find_one({'workid': work['_id'], 'matchid': match['_id']})
    #logging.debug('fmatch=%s', fmatch)
    if not fmatch:
        from utils import matchFam
        fmatch = matchFam(work['_id'], match['_id'], config)
        #logging.debug('fmatch=%s', fmatch)
    features = []
    #famSim
    features.append(familySim(work, config['persons'], match, config['match_persons']))
    #matchtext cos sim?
    #green Parents 0, 0.5, 1
    #yellow Parents 0, 0.5, 1
    #red Parents 0, 0.5, 1
    green = 0.0
    yellow = 0.0
    red = 0.0
    for partner in ('husb','wife'):
        try:
            if fmatch[partner]['status'] in common.statOK: green += 0.5
            elif fmatch[partner]['status'] in common.statManuell: yellow += 0.5
            elif fmatch[partner]['status'] in common.statEjOK: red += 0.5
        except: pass
    features.append(green)
    features.append(yellow)
    features.append(red)
    #green children 0 - 1
    #yellow children 0 - 1
    #red children 0 - 1
    #white children 0 - 1
    chstat = defaultdict(int)
    antch = 0.0
    for ch in fmatch['children']:
        antch += 1.0
        if ch['status'] in common.statOK: chstat['green'] += 1
        elif ch['status'] in common.statManuell: chstat['yellow'] += 1
        elif ch['status'] in common.statEjOK: chstat['red'] += 1
        elif ch['status'] == "": chstat['white'] += 1
        #logging.debug('in loop %s %s', ch['status'], chstat)
    #logging.debug('fmatch=%s, antch=%s, chstat=%s', len(fmatch['children']), antch, chstat)
    if antch==0: antch=1.0 #avoid division by 0
    features.append(float(chstat['green'])/antch)
    features.append(float(chstat['yellow'])/antch)
    features.append(float(chstat['red'])/antch)
    features.append(float(chstat['white'])/antch)
    #marriage datesim
    try: features.append(dateSim(work['marriage']['date'], match['marriage']['date']))
    except: features.append(dateSim(None, None))
    #marriage placesim
    try: features.append(strSim(work['marriage']['place'], match['marriage']['place']))
    except: features.append(strSim(None, None))
    #cos-sim fammatchtext - kanske inte - barn ofta olika!
    return [0.0 if v is None else v for v in features]
コード例 #3
0
        for f in tFam:
            for ff in rFam:
                try:
                    if len(f['marriage']['date'])>4 and f['marriage']['date'] == ff['marriage']['date']:
                        fams.add((f['_id'], ff['_id']))
                        tDone.append(f['_id'])
                    else:
                        tfams.append([f['_id'], f['marriage']['date'], ff['_id'], ff['marriage']['date']])
                except:
                    fams.add((f['_id'], ff['_id']))
        for l in tfams:
            if l[0] not in tDone:
                fams.add((l[0], l[2]))
famMatchSummary = {}
for (tFamId,rFamId) in fams:    #  for all involved families
    famMatchData = matchFam(tFamId, rFamId, config)
    #famMatchSummary[(tFamId,rFamId)] = famMatchData['summary']
    fam_matches.insert(famMatchData)
    ant += 1
logging.info('%d family matchings inserted', ant)
logging.info('Time %s',time.time() - t0)

#############################################################
if noFamSVM: #default
    logging.info('Matching All done')
    sys.exit()
logging.info('Doing SVM family match, incl split-ifying')

from uiUtils import nameDiff, eventDiff
from utils import updateFamMatch
#USE famfeatureSet!!
コード例 #4
0
ファイル: uiUtils.py プロジェクト: wroldwiedbwe/gedMerge
def personView(wid, mid):
    #show personMatch
    res = []
    ##BUG FIX handle as families: if wid & mid elif mid elif wid ...
    #    if mid:
    #        matches = common.config['matches'].find({'workid': ObjectId(wid), 'matchid': ObjectId(mid)})
    #    else:
    #        matches = common.config['matches'].find({'workid': ObjectId(wid)})
    ##
    if wid and mid:
        matches = common.config['matches'].find({
            'workid': wid,
            'matchid': mid
        })
    elif wid:
        """
        AA0 debug
        # multilista => only statOK and statManuell
        matches = common.config['matches'].find({'$and': [{'workid': wid},
                {'status': {'$in': list(common.statOK.union(common.statManuell))}}]})
        """
        matches = common.config['matches'].find({'workid': wid})

    elif mid:
        # multilista => only statOK and statManuell
        matches = common.config['matches'].find({
            '$and': [{
                'matchid': mid
            }, {
                'status': {
                    '$in': list(common.statOK.union(common.statManuell))
                }
            }]
        })
    else:
        matches = []
    ##
    for pmatch in matches:
        #print 'Doing', pmatch['pwork']['refId'], pmatch['pmatch']['refId']
        #FIX Filter om match-status
        prow = persMatchDisp('Person', pmatch)
        #show familyMatch
        mid = pmatch['matchid']
        #        if not wid: wid = str(pmatch['workid'])
        wid = str(pmatch['workid'])
        ftab = []
        #Match exists children?
        #print 'children'
        fmatch = common.config['fam_matches'].find_one({
            'children.pwork._id':
            wid,
            'children.pmatch._id':
            mid
        })
        if fmatch: ftab = famDisp(None, None, fmatch)
        else:  #HUSB
            #print 'husb'
            fmatch = common.config['fam_matches'].find_one({
                'husb.pwork._id':
                wid,
                'husb.pmatch._id':
                mid
            })
            if fmatch: ftab = famDisp(None, None, fmatch)
            else:  #WIFE
                #print 'wife',wid,mid
                fmatch = common.config['fam_matches'].find_one({
                    'wife.pwork._id':
                    wid,
                    'wife.pmatch._id':
                    mid
                })
                if fmatch: ftab = famDisp(None, None, fmatch)
                else:
                    #print 'matchchild'
                    #wfamid = common.config['families'].find_one({'children': ObjectId(wid)}, {'_id': True})
                    wfamid = common.config['relations'].find_one({
                        'relTyp': 'child',
                        'persId': wid
                    })
                    #mfamid = common.config['match_families'].find_one({'children': mid}, {'_id': True})
                    mfamid = common.config['match_relations'].find_one({
                        'relTyp':
                        'child',
                        'persId':
                        wid
                    })
                    if wfamid and mfamid:
                        try:
                            fmatch = matchFam(wfamid['famId'], mfamid['famId'],
                                              common.config)
                            ftab = famDisp(None, None, fmatch)
                        except:
                            pass
                    else:
                        #print 'matchhusb'
                        #wfamid = common.config['families'].find_one({'husb': ObjectId(wid)}, {'_id': True})
                        #mfamid = common.config['match_families'].find_one({'husb': mid}, {'_id': True})
                        wfamid = common.config['relations'].find_one({
                            'relTyp':
                            'husb',
                            'persId':
                            wid
                        })
                        mfamid = common.config['match_relations'].find_one({
                            'relTyp':
                            'husb',
                            'persId':
                            wid
                        })
                        if wfamid and mfamid:
                            try:
                                fmatch = matchFam(wfamid['famId'],
                                                  mfamid['famId'],
                                                  common.config)
                                ftab = famDisp(None, None, fmatch)
                            except:
                                pass
                        else:
                            #print 'matchwife'
                            #wfamid = common.config['families'].find_one({'wife': ObjectId(wid)}, {'_id': True})
                            #mfamid = common.config['match_families'].find_one({'wife': mid}, {'_id': True})
                            wfamid = common.config['relations'].find_one({
                                'relTyp':
                                'wife',
                                'persId':
                                wid
                            })
                            mfamid = common.config['match_relations'].find_one(
                                {
                                    'relTyp': 'wife',
                                    'persId': wid
                                })
                            if wfamid and mfamid:
                                try:
                                    fmatch = matchFam(wfamid['famId'],
                                                      mfamid['famId'],
                                                      common.config)
                                    ftab = famDisp(None, None, fmatch)
                                except:
                                    pass

        if fmatch:
            #print 'fmatch', fmatch['workRefId'], fmatch['matchRefId']
            #            res.append((prow, str(pmatch['workid']), str(pmatch['matchid']),
            #                        ftab, fmatch['workRefId'], fmatch['matchRefId']))
            res.append((prow, str(pmatch['workid']), str(pmatch['matchid']),
                        ftab, fmatch['workid'], fmatch['matchid']))
        else:
            res.append((prow, str(pmatch['workid']), str(pmatch['matchid']),
                        ftab, None, None))
    return res
コード例 #5
0
ファイル: SVMfeatures.py プロジェクト: wroldwiedbwe/gedMerge
def svmFamily(work, match, config):
    #    work = config['families'].find_one({'refId': wid})
    #    match = config['match_families'].find_one({'refId': mid})
    if not work or not match: return None
    fmatch = config['fam_matches'].find_one({
        'workid': work['_id'],
        'matchid': match['_id']
    })
    logging.debug('fmatch=%s', fmatch)
    if not fmatch:
        from utils import matchFam
        fmatch = matchFam(work['_id'], match['_id'], config)
        logging.debug('fmatch=%s', fmatch)
    features = []
    #famSim
    features.append(
        familySim(work, config['persons'], match, config['match_persons']))
    #matchtext cos sim?
    #green Parents 0, 0.5, 1
    #yellow Parents 0, 0.5, 1
    #red Parents 0, 0.5, 1
    green = 0.0
    yellow = 0.0
    red = 0.0
    for partner in ('husb', 'wife'):
        try:
            #FIX MODEL!!!
            #            if fmatch['partner']['status'] in common.statOK: green += 0.5
            #            elif fmatch['partner']['status'] in common.statManuell: yellow += 0.5
            #            elif fmatch['partner']['status'] in common.statEjOK: red += 0.5
            if fmatch[partner]['status'] in common.statOK: green += 0.5
            elif fmatch[partner]['status'] in common.statManuell: yellow += 0.5
            elif fmatch[partner]['status'] in common.statEjOK: red += 0.5
        except:
            pass
    features.append(green)
    features.append(yellow)
    features.append(red)
    #green children 0 - 1
    #yellow children 0 - 1
    #red children 0 - 1
    #white children 0 - 1
    chstat = defaultdict(int)
    antch = 0.0
    for ch in fmatch['children']:
        antch += 1.0
        if ch['status'] in common.statOK: chstat['green'] += 1
        elif ch['status'] in common.statManuell: chstat['yellow'] += 1
        elif ch['status'] in common.statEjOK: chstat['red'] += 1
        elif ch['status'] == "": chstat['white'] += 1
        logging.debug('in loop %s %s', ch['status'], chstat)
    logging.debug('fmatch=%s, antch=%s, chstat=%s', len(fmatch['children']),
                  antch, chstat)
    if antch == 0: antch = 1.0  #avoid division by 0
    features.append(float(chstat['green']) / antch)
    features.append(float(chstat['yellow']) / antch)
    features.append(float(chstat['red']) / antch)
    features.append(float(chstat['white']) / antch)
    #marriage datesim
    try:
        features.append(
            dateSim(work['marriage']['date'], match['marriage']['date']))
    except:
        features.append(dateSim(None, None))
    #marriage placesim
    try:
        features.append(
            strSim(work['marriage']['place'], match['marriage']['place']))
    except:
        features.append(strSim(None, None))
    #cos-sim fammatchtext - kanske inte - barn ofta olika!

    return cleanupVect(features)
コード例 #6
0
ファイル: uiUtils.py プロジェクト: andersardo/gedMerge
def personView(wid, mid):
    #show personMatch
    res = []
##BUG FIX handle as families: if wid & mid elif mid elif wid ...
#    if mid:
#        matches = common.config['matches'].find({'workid': ObjectId(wid), 'matchid': ObjectId(mid)})
#    else:
#        matches = common.config['matches'].find({'workid': ObjectId(wid)})
##
    if wid and mid:
        matches = common.config['matches'].find({'workid': wid, 'matchid': mid})
    elif wid:
        """
        AA0 debug
        # multilista => only statOK and statManuell
        matches = common.config['matches'].find({'$and': [{'workid': wid},
                {'status': {'$in': list(common.statOK.union(common.statManuell))}}]})
        """
        matches = common.config['matches'].find({'workid': wid})

    elif mid:
        # multilista => only statOK and statManuell
        matches = common.config['matches'].find({'$and': [{'matchid': mid},
                {'status': {'$in': list(common.statOK.union(common.statManuell))}}]})
    else: matches = []
##
    for pmatch in matches:
        #print 'Doing', pmatch['pwork']['refId'], pmatch['pmatch']['refId']
        #FIX Filter om match-status
        prow = persMatchDisp('Person', pmatch)
        #show familyMatch
        mid = pmatch['matchid']
#        if not wid: wid = str(pmatch['workid'])
        wid = str(pmatch['workid'])
        ftab = []
        #Match exists children?
        #print 'children'
        fmatch = common.config['fam_matches'].find_one({'children.pwork._id': wid, 'children.pmatch._id': mid})
        if fmatch: ftab = famDisp(None, None, fmatch)
        else:  #HUSB
            #print 'husb'
            fmatch = common.config['fam_matches'].find_one({'husb.pwork._id': wid, 'husb.pmatch._id': mid})
            if fmatch: ftab = famDisp(None, None, fmatch)
            else:  #WIFE
                #print 'wife',wid,mid
                fmatch = common.config['fam_matches'].find_one({'wife.pwork._id': wid, 'wife.pmatch._id': mid})
                if fmatch: ftab = famDisp(None, None, fmatch)
                else:
                    #print 'matchchild'
                    #wfamid = common.config['families'].find_one({'children': ObjectId(wid)}, {'_id': True})
                    wfamid = common.config['relations'].find_one({'relTyp': 'child', 'persId': wid})
                    #mfamid = common.config['match_families'].find_one({'children': mid}, {'_id': True})
                    mfamid = common.config['match_relations'].find_one({'relTyp': 'child', 'persId': wid})
                    if wfamid and mfamid:
                        try:
                            fmatch = matchFam(wfamid['famId'], mfamid['famId'], common.config)
                            ftab = famDisp(None, None, fmatch)
                        except: pass
                    else:
                        #print 'matchhusb'
                        #wfamid = common.config['families'].find_one({'husb': ObjectId(wid)}, {'_id': True})
                        #mfamid = common.config['match_families'].find_one({'husb': mid}, {'_id': True})
                        wfamid = common.config['relations'].find_one({'relTyp': 'husb', 'persId': wid})
                        mfamid = common.config['match_relations'].find_one({'relTyp': 'husb', 'persId': wid})
                        if wfamid and mfamid:
                            try:
                                fmatch = matchFam(wfamid['famId'], mfamid['famId'], common.config)
                                ftab = famDisp(None, None, fmatch)
                            except: pass
                        else:
                            #print 'matchwife'
                            #wfamid = common.config['families'].find_one({'wife': ObjectId(wid)}, {'_id': True})
                            #mfamid = common.config['match_families'].find_one({'wife': mid}, {'_id': True})
                            wfamid = common.config['relations'].find_one({'relTyp': 'wife', 'persId': wid})
                            mfamid = common.config['match_relations'].find_one({'relTyp': 'wife', 'persId': wid})
                            if wfamid and mfamid:
                                try:
                                    fmatch = matchFam(wfamid['famId'], mfamid['famId'], common.config)
                                    ftab = famDisp(None, None, fmatch)
                                except: pass

        if fmatch:
            #print 'fmatch', fmatch['workRefId'], fmatch['matchRefId']
#            res.append((prow, str(pmatch['workid']), str(pmatch['matchid']),
#                        ftab, fmatch['workRefId'], fmatch['matchRefId']))
            res.append((prow, str(pmatch['workid']), str(pmatch['matchid']),
                        ftab, fmatch['workid'], fmatch['matchid']))
        else: res.append((prow, str(pmatch['workid']), str(pmatch['matchid']), ftab,None,None))
    return res