Ejemplo n.º 1
0
    return f

# (0) decode command-line
(configfile, testfilename) = sys.argv[1 : ]
    
# (1) load configuration
config = ConfigLoader.load(configfile)
linking = config.getDataSources().isEmpty()
if linking:
    lowlimit = 0.0
else:
    lowlimit = 0.4

# (2) index up all the data
processor = Processor(config)
database = processor.getDatabase()
if not linking:
    processor.index(config.getDataSources(), 40000)
else:
    processor.index(config.getDataSources(1), 40000)

# (3) actual genetic stuff
pkg = "no.priv.garshol.duke.comparators."
comparators = ["DiceCoefficientComparator",
               "DifferentComparator",
               "ExactComparator",
               "JaroWinkler",
               "JaroWinklerTokenized",
               "Levenshtein",
               "NumericComparator",
               "PersonNameComparator",
Ejemplo n.º 2
0
else:
    golddb = None
    
# (1) load configuration
config = ConfigLoader.load(configfile)
properties = config.getProperties()[:]
idprops = config.getIdentityProperties()
linking = not config.isDeduplicationMode()
if linking:
    lowlimit = 0.0
else:
    lowlimit = 0.4

# (2) index up all the data
processor = Processor(config)
alldb = processor.getDatabase()
if not linking:
    processor.index(config.getDataSources(), 40000)
else:
    processor.index(config.getDataSources(1), 40000)
    processor.index(config.getDataSources(2), 40000)

if linking:
    config.setPath((config.getPath() or '/tmp/duke-active-ix-') + '2') # AHEM...
    processor = Processor(config)
    database = processor.getDatabase()
    if not linking:
        processor.index(config.getDataSources(), 40000)
    else:
        processor.index(config.getDataSources(1), 40000)
else:
Ejemplo n.º 3
0
else:
    golddb = None

# (1) load configuration
config = ConfigLoader.load(configfile)
properties = config.getProperties()[:]
idprops = config.getIdentityProperties()
linking = not config.isDeduplicationMode()
if linking:
    lowlimit = 0.0
else:
    lowlimit = 0.4

# (2) index up all the data
processor = Processor(config)
alldb = processor.getDatabase()
if not linking:
    processor.index(config.getDataSources(), 40000)
else:
    processor.index(config.getDataSources(1), 40000)
    processor.index(config.getDataSources(2), 40000)

if linking:
    config.setPath(
        (config.getPath() or '/tmp/duke-active-ix-') + '2')  # AHEM...
    processor = Processor(config)
    database = processor.getDatabase()
    if not linking:
        processor.index(config.getDataSources(), 40000)
    else:
        processor.index(config.getDataSources(1), 40000)
Ejemplo n.º 4
0

# (0) decode command-line
(configfile, testfilename) = sys.argv[1:]

# (1) load configuration
config = ConfigLoader.load(configfile)
linking = config.getDataSources().isEmpty()
if linking:
    lowlimit = 0.0
else:
    lowlimit = 0.4

# (2) index up all the data
processor = Processor(config)
database = processor.getDatabase()
if not linking:
    processor.index(config.getDataSources(), 40000)
else:
    processor.index(config.getDataSources(1), 40000)

# (3) actual genetic stuff
pkg = "no.priv.garshol.duke.comparators."
comparators = [
    "DiceCoefficientComparator", "DifferentComparator", "ExactComparator",
    "JaroWinkler", "JaroWinklerTokenized", "Levenshtein", "NumericComparator",
    "PersonNameComparator", "SoundexComparator", "WeightedLevenshtein",
    "NorphoneComparator", "MetaphoneComparator", "QGramComparator",
    "GeopositionComparator"
]
comparators = [ObjectUtils.instantiate(pkg + c) for c in comparators]