Example #1
0
def collecting_tokens(update, context):
    if DEBUG:
        print(update)
        print(2)

    if update.effective_chat.type == "group":
        username = update.effective_user.username
        text = update.message.text.split(' ')
        print(text)
        if username in DataPacket.members:
            if len(text) == 2:
                if text[1] == DataPacket.members[username]["token"]:
                    DataPacket.members[username]["token_received"] = True
                    IO.save_file_json(
                        Constants.DATA_PATH + Constants.DATA_MEMBERS_FILENAME,
                        DataPacket.members)
                    context.bot.send_message(
                        chat_id=update.effective_chat.id,
                        text=Messages.TOKEN_RECEIVED_FROM % username)

        tokens_not_received = check_tokens_received(DataPacket.members)
        if len(tokens_not_received) != 0:
            context.bot.send_message(chat_id=update.effective_chat.id,
                                     text=Messages.WAITING_FOR_TOKEN_FROM %
                                     str(tokens_not_received))
            return COLLECTING_TOKENS
        elif len(tokens_not_received) == 0:
            context.bot.send_message(chat_id=update.effective_chat.id,
                                     text=Messages.ALL_TOKENS_RECEIVED)
            return get_seed(update, context)

    else:
        context.bot.send_message(chat_id=update.effective_chat.id,
                                 text=Messages.MUST_BE_IN_GROUP)
        return COLLECTING_TOKENS
Example #2
0
def get_theme(update, context):
    if DEBUG:
        print(update)
    if DataPacket.is_seed_generated:
        if DataPacket.theme is None:
            DataPacket.theme = DataPacket.make_theme_object()

            mega_suggestion_list = []
            tokens = []
            for data in DataPacket.members.values():
                mega_suggestion_list += data["suggestions"]
                tokens += data["token"]
            DataPacket.theme["theme"] = SeedGenerator.get_theme(
                mega_suggestion_list, DataPacket.seed)
            DataPacket.theme["is_theme_generated"] = 1

            IO.save_file_json(
                Constants.DATA_PATH + Constants.DATA_THEME_FILENAME,
                DataPacket.theme)

            context.bot.send_message(chat_id=update.effective_chat.id,
                                     text=Messages.GENERATED_THEME_IS %
                                     DataPacket.theme["theme"])

        else:
            context.bot.send_message(chat_id=update.effective_chat.id,
                                     text=Messages.THEME_ALREADY_SELECTED %
                                     "theme")

    else:
        context.bot.send_message(chat_id=update.effective_chat.id,
                                 text=Messages.SEED_NOT_GENERATED)
Example #3
0
	def __init__(self,extractors,iob_directories=[],iob_file=None,label_index=-1):
		"""
		Args:
			extractors:
				the list of canonical citation extractors to evaluate
			iob_test_file: 
				the file in IOB format to be used for testing and evaluating the extactors
		"""
		# read the test instances from a list of directories containing the test data
		import logging
		self.logger = logging.getLogger("CREX.SIMPLEVAL")
		if(iob_file is None):
			self.logger.debug(iob_directories)
			data = []
			for directory in iob_directories:
				data += IO.read_iob_files(directory,".txt")
			self.test_instances = data
		else:
			self.test_instances = IO.file_to_instances(iob_file)
		self.logger.debug("Found %i instances for test"%len(self.test_instances))
		self.extractors = extractors
		self.output = {}
		self.error_matrix = None
		self.label_index = label_index
		return
Example #4
0
    def __call__(self, fidx):
        """
        Temporary worker to load data files
        This function does the actual processing of the correct target values
        used in the analysis below
        """
        # shorthands
        fileName = self.filenames[fidx]
        fileNames = self.filenames
        setting = self.setting

        # do control
        data = np.frombuffer(self.buff).reshape(self.buffshape)
        node, temp, trial = np.unravel_index(fidx,
                                             self.expectedShape,
                                             order='F')
        # control data
        if '{}' in fileName:
            # load control; bias correct mi
            control = IO.loadData(fileName)
            # panzeri-treves correction
            mi = control.mi
            bias = stats.panzeriTrevesCorrection(control.px,\
                                                 control.conditional, \
                                                 setting.repeat)
            mi -= bias

            data[0, trial, temp] = mi[:self.deltas, :].T
        # nudged data
        else:
            targetName = fileName.split('=')[-1].strip(
                '.pickle')  # extract relevant part
            # extract pulse and only get left part
            targetName = re.sub('{|}', '', targetName).split(':')[0]
            # get the idx of the node
            nodeNames = []
            for name, idx in setting.mapping.items():
                if name in targetName:
                    nodeNames.append(idx)
                    # print(idx, name, targetName)

            # load the corresponding dataset to the control
            controlidx = fidx - node
            assert '{}' in fileNames[controlidx]
            # load matching control
            control = IO.loadData(fileNames[controlidx])
            # load nudge
            sample = IO.loadData(fileName)
            # impact = stats.KL(control.px, sample.px)
            impact = stats.KL(sample.px, control.px)
            # don't use +1 as the nudge has no effect at zero
            redIm = np.nansum(impact[-self.deltas:], axis=-1).T
            # TODO: check if this works with tuples (not sure)
            for name in nodeNames:
                data[(node - 1) // setting.nNodes + 1, trial, temp,
                     name, :] = redIm.squeeze().T
        self.pbar.update(1)
Example #5
0
def init():
    if not IO.file_exists(Constants.DATA_PATH):
        IO.make_dir(Constants.DATA_PATH)

    if IO.file_exists(Constants.DATA_PATH + Constants.DATA_MEMBERS_FILENAME +
                      ".json"):
        data = IO.read_file_json(Constants.DATA_PATH +
                                 Constants.DATA_MEMBERS_FILENAME)
        if data != 0:
            DataPacket.members = data
            if len(DataPacket.members) != 0:
                DataPacket.is_process_started = True
Example #6
0
def runJob(model, settings, simulationRoot):
    """
    Run the job and stops process if taking too long"
    """
    global rootDirectory

    # check if the file is already there else skip
    fn = createJob(model, settings, simulationRoot)
    if os.path.exists(fn):
        print(f'{fn} exists')
        return 0

    if settings.get('pulse'):
        trial = settings.get('trial')
        mag = settings.get('ratio')[0]

        control = f'data/trial={trial}_r={mag}_{{}}.pickle'
        control = os.path.join(simulationRoot, control)

        snapshots = {}
        # try to load the snapshots
        # redundant check if run on separate process
        while not snapshots:
            try:
                snapshots = IO.loadPickle(control).snapshots
            except:
                time.sleep(1)

    else:
        snaps = {}
        for k in 'nSamples burninSamples steps'.split():
            snaps[k] = settings.get(k)
        snapshots = infcy.getSnapShots(model, **snaps)

    conditional, px, mi = infcy.runMC(model, snapshots, deltas, repeats)
    store       = dict(\
        mi          = mi,\
        conditional = conditional,\
        px          = px,\
        snapshots   = snapshots)

    # empty vector to safe disk space
    if settings.get('pulse'):
        for i in 'mi conditional snapshots'.split():
            store[i] = []
    sr = IO.SimulationResult(**store)
    IO.savePickle(fn, sr, verbose=1)
    checkTime()
Example #7
0
def checkTime():
    """
    Save state on exit
    """
    import inspect

    global PID
    if time.time() > THRESHOLD:

        globs = {}
        for k, v in globals().copy().items():
            if not inspect.ismodule(v) and not isinstance(
                    v, argparse.ArgumentParser):
                globs[k] = v
        if PID is None:
            PID = 1234
        simFile = f'sim-{PID}'
        IO.savePickle(simFile, globs)
        sys.exit()
Example #8
0
def collecting_suggestions(update, context):
    if DEBUG:
        print(update)
    suggestions = update.message.text.split(',')
    for suggestion in suggestions:
        suggestion = suggestion.lower()
        if len(suggestion) == 0:
            continue
        if suggestion in DataPacket.members[
                update.effective_user.username]["suggestions"]:
            continue
        DataPacket.members[
            update.effective_user.username]["suggestions"].append(suggestion)
    IO.save_file_json(Constants.DATA_PATH + Constants.DATA_MEMBERS_FILENAME,
                      DataPacket.members)

    context.bot.send_message(chat_id=update.effective_chat.id,
                             text=Messages.SUGGESTIONS_ADDED)
    return finished_adding_suggestions(update, context)
Example #9
0
	def create_datasets(self):
		"""
		TODO
		"""
		
		from miguno.partitioner import *
		from miguno.crossvalidationdataconstructor import *
		from citation_extractor.Utils import IO
		positive_labels = ["B-REFSCOPE","I-REFSCOPE","B-AAUTHOR","I-AAUTHOR","B-REFAUWORK","I-REFAUWORK","B-AWORK","I-AWORK"]
		if(self.culling_size is not None):
			positives_negatives = [(n,IO.instance_contains_label(inst,positive_labels)) for n,inst in enumerate(self.culled_instances)]
			positives = [self.culled_instances[i[0]] for i in positives_negatives if i[1] is True]
			negatives = [self.culled_instances[i[0]] for i in positives_negatives if i[1] is False]
		else:
			positives_negatives = [(n,IO.instance_contains_label(inst,positive_labels)) for n,inst in enumerate(self.test_instances)]
			positives = [self.test_instances[i[0]] for i in positives_negatives if i[1] is True]
			negatives = [self.test_instances[i[0]] for i in positives_negatives if i[1] is False]
		self.logger.info("%i Positive instances"%len(positives))
		self.logger.info("%i Negative instances"%len(negatives))
		self.logger.info("%i Total instances"%(len(positives)+len(negatives)))
		self.dataSets_iterator = CrossValidationDataConstructor(positives, negatives, numPartitions=self.fold_number, randomize=False).getDataSets()
		pass
Example #10
0
def generate_token(update, context):
    if DEBUG:
        print(update)
        print("User %s requested token" % update.effective_user.username)
    if DataPacket.is_process_started:
        if update.effective_chat.type != "group":
            username = update.effective_user.username
            if username not in DataPacket.members:
                member = DataPacket.make_member_object(username)
                DataPacket.members.update(member)
                DataPacket.members[username]["token"] = str(uuid.uuid4())

                IO.save_file_json(
                    Constants.DATA_PATH + Constants.DATA_MEMBERS_FILENAME,
                    DataPacket.members)

                context.bot.send_message(chat_id=update.effective_chat.id,
                                         text=Messages.GENERATED_TOKEN_IS %
                                         DataPacket.members[username]["token"])
                print("\tToken generated: %s" %
                      DataPacket.members[username]["token"])
            else:
                context.bot.send_message(
                    chat_id=update.effective_chat.id,
                    text=Messages.TOKEN_ALREADY_GENERATED %
                    DataPacket.members[username]["token"])
                print("\tUser already has token: %s" %
                      DataPacket.members[username]["token"])

        else:
            context.bot.send_message(
                chat_id=update.effective_chat.id,
                text=Messages.CANNOT_REQUEST_TOKEN_IN_GROUP)
    else:
        context.bot.send_message(chat_id=update.effective_chat.id,
                                 text=Messages.SELECTION_NOT_STARTED)
Example #11
0
loadGraph = ''
if __name__ == '__main__':
    graphs = []
    N = 10
    if not loadGraph:
        for i in range(10):

            r = np.random.rand() * (1 - .2) + .2
            # g = nx.barabasi_albert_graph(N, 2)
            # g = nx.erdos_renyi_graph(N, r)
            # g = nx.duplication_divergence_graph(N, r)
            # graphs.append(g)
    else:
        print('running craph graph')
        graph = IO.loadPickle(loadGraph)
        graphs.append(graph)
    # w = nx.utils.powerlaw_sequence(N, 2)
    # g = nx.expected_degree_graph(w)
    # g = sorted(nx.connected_component_subgraphs(g), key = lambda x: len(x))[-1]

    #for i, j in g.edges():
    #    g[i][j]['weight'] = np.random.rand() * 2 - 1
#        graphs.append(g)

#    graphs[0].add_edge(0,0)
#    for j in np.int32(np.logspace(0, np.log10(N-1),  5)):
#       graphs.append(nx.barabasi_albert_graph(N, j))
    dataDir = 'Graphs'  # relative path careful
    df = IO.readCSV(f'{dataDir}/Graph_min1_1.csv', header=0, index_col=0)
    h = IO.readCSV(f'{dataDir}/External_min1_1.csv', header=0, index_col=0)
    networkSettings = dict( \
        path = args.graph, \
        size = N, \
        fixedNodes = args.nodes
    )

    # setup Ising model with N=networkSize spin flip attempts per simulation step
    modelSettings = dict( \
        temperature     = T, \
        updateType      = 'async' ,\
        magSide         = args.magSide if args.magSide in ['pos', 'neg'] else ''
    )
    model = fastIsing.Ising(graph, **modelSettings)

    try:
        mixingResults = IO.loadResults(targetDirectory, 'mixingResults')
        corrTimeSettings = IO.loadResults(targetDirectory, 'corrTimeSettings')
        burninSteps = mixingResults['burninSteps']
        distSamples = mixingResults['distSamples']

    except:
        subprocess.call(['python3', 'run_mixing.py', f'{args.T}', f'{args.dir}', f'{args.graph}', \
                        '--maxcorrtime', '10000', \
                        '--maxmixing', '10000'])
        mixingResults = IO.loadResults(targetDirectory, 'mixingResults')
        corrTimeSettings = IO.loadResults(targetDirectory, 'corrTimeSettings')
        burninSteps = mixingResults['burninSteps']
        distSamples = mixingResults['distSamples']


    systemSnapshotSettings = dict( \
Example #13
0
    # m.equilibriate(magRatios, eqSettings)
    combinations = itertools.product(\
        m.matched['ratios'].items(),\
        pulseSizes, \
        range(settings.get('nTrials'))
    )
    settings['graph'] = g
    # setup filepaths
    now = datetime.datetime.now().isoformat()
    simulationRoot = os.path.join(\
                    rootDirectory, now)
    print(f"Making {simulationRoot}")
    os.makedirs(simulationRoot, exist_ok=1)

    settings['equilibrium'] = m.matched
    IO.savePickle(os.path.join(simulationRoot, 'settings'),\
                settings)

    for (ratio, pulse, trial) in combinations:

        # tmp added properties
        settings['trial'] = trial
        settings['ratio'] = ratio

        if pulse:
            for node in m.graph.nodes():
                tmp = copy.deepcopy(m)
                tmp.t = ratio[1]
                intervention = {node: pulse}
                tmp.nudges = intervention

                settings['pulse'] = intervention
Example #14
0
                snapshots.append(s)

            MI, HX = computeMI_cond(model, node, minDist, maxDist,
                                    allNeighbours_G, snapshots, nTrials,
                                    nSamples, modelSettings)
            MIs_cond[T] = MI

            snapshotSettingsJoint = dict( \
                nSamples    = args.numSamplesJoint, \
                repeats     = args.repeats, \
                burninSteps = args.burninSteps, \
                distSamples = args.distSamples, \
                maxDist     = maxDist, \
                nBins       = args.bins
            )
            IO.saveSettings(targetDirectory, snapshotSettingsJoint,
                            'jointSnapshots')

            allNeighbours_G_allNodes = model.neighboursAtDistAllNodes(
                nodes, maxDist)

            avgSnapshots, avgSystemSnapshots, fullSnapshots = simulation.getJointSnapshotsPerDistNodes(model, nodes, \
                                                                                allNeighbours_G_allNodes, \
                                                                                **snapshotSettingsJoint, threads=nthreads, \
                                                                                initStateIdx=1, getFullSnapshots=1)

            MI, corr = infoTheory.pairwiseMI_allNodes(
                model,
                nodes,
                fullSnapshots.reshape(
                    (args.repeats * args.numSamplesJoint, -1)),
                distMax=maxDist)
from Utils import IO, stats, misc, plotting as plotz
from functools import partial

"""
- work from a folder directory
"""
# standard stuff
#root =  '/run/media/casper/test/1550482875.0001953/'
root = 'Data/cveltere/2019-05-09T16:10:34.645885'
root = '/run/media/casper/fc7e7a2a-73e9-41fe-9020-f721489b1900/cveltere'
root = 'Data/2019-05-13T13:34:02.290439'
root = 'Data/1548025318.5751357'
root = 'Data/new3'
#root = '/run/media/casper/4fdab2ee-95ad-4fc5-8027-8d079de9d4f8/Data/1548025318'

data     = IO.DataLoader(root) # extracts data folders
settings = {key : IO.Settings(root) for key in data} # load corresponding settings

centralities = {
                    r'$c_i^{deg}$' : partial(nx.degree, weight = 'weight'), \
                    r'$c_i^{betw}$': partial(nx.betweenness_centrality, weight = 'weight'),\
                    r'$c_i^{ic}$'  : partial(nx.information_centrality, weight = 'weight'),\
                    r'$c_i^{ev}$'  : partial(nx.eigenvector_centrality, weight = 'weight'),\
            }
copying build/lib.linux-x86_64-3.7/Toolbox/infcy.cpython-37m-x86_64-linux-gnu.so -> Toolbox

#centralities = {key : partial(value, weight = 'weight') for key, value in nx.__dict__.items() if '_centrality' in key}

figDir = '../thesis/figures/'
information_impact = '$\mu_i$'
causal_impact      = '$\gamma_i$'
Example #16
0
    networkSettings = dict( \
        path = args.graph, \
        size = N
    )

    # setup Ising model with N=networkSize spin flip attempts per simulation step
    modelSettings = dict( \
        temperature     = T, \
        updateType      = 'async' ,\
        magSide         = args.magSide if args.magSide in ['pos', 'neg'] else ''
    )
    model = fastIsing.Ising(graph, **modelSettings)

    try:
        mixingResults = IO.loadResults(targetDirectory, 'mixingResults')
        corrTimeSettings = IO.loadResults(targetDirectory, 'corrTimeSettings')
        burninSteps = mixingResults['burninSteps']
        distSamples = mixingResults['distSamples']

    except:
        subprocess.call(['python3', 'run_mixing.py', f'{T}', f'{args.dir}', f'{args.graph}', \
                        '--maxcorrtime', '10000', \
                        '--maxmixing', '10000'])
        mixingResults = IO.loadResults(targetDirectory, 'mixingResults')
        corrTimeSettings = IO.loadResults(targetDirectory, 'corrTimeSettings')
        burninSteps = mixingResults['burninSteps']
        distSamples = mixingResults['distSamples']


    systemSnapshotSettings = dict( \
import networkx as nx, \
        itertools, scipy,\
        os,     pickle, \
        sys, \
        multiprocessing as mp, json,\
        datetime, sys, \
        scipy, \
        time

parser = argparse.ArgumentParser()
parser.add_argument('--file')
if __name__ == "__main__":
    args = parser.parse_args()
    print(args, args.file)
    runFile = args.file
    for k, v in IO.loadPickle(runFile).items():
        globals()[k] = v

    modelSettings = dict(\
                             graph       = graph,\
                             temperature = 0,\
                             updateType  = updateType,\
                             magSide     = magSide,\
                             nudgeType   = nudgeType)
    model = FastIsing.Ising(**modelSettings)
    magRange = np.array([CHECK]).ravel()

    # magRange = array([.9, .2])
    fitTemps = np.linspace(0, graph.number_of_nodes() / 2, tempres)
    mag, sus = model.matchMagnetization(temps = fitTemps,\
     n = int(1e3), burninSamples = 0)
Example #18
0
    networkSettings = dict( \
        path = args.graph, \
        size = N, \
        nodes = nodes
    )

    # setup Ising model with N=networkSize spin flip attempts per simulation step
    modelSettings = dict( \
        temperature     = T, \
        updateType      = 'async' ,\
        magSide         = args.magSide if args.magSide in ['pos', 'neg'] else ''
    )
    model = fastIsing.Ising(graph, **modelSettings)

    try:
        mixingResults = IO.loadResults(targetDirectory, 'mixingResults')
        corrTimeSettings = IO.loadResults(targetDirectory, 'corrTimeSettings')
        burninSteps = mixingResults['burninSteps']
        distSamples = mixingResults['distSamples']

    except:
        # try to load data containing mixing and correlation time. If it doesn't exist
        # yet, use 'run_mixing.py' script to generate it
        subprocess.call(['python3', 'run_mixing.py', f'{args.T}', f'{args.dir}', f'{args.graph}', \
                        '--maxcorrtime', '10000', \
                        '--maxmixing', '10000'])
        mixingResults = IO.loadResults(targetDirectory, 'mixingResults')
        corrTimeSettings = IO.loadResults(targetDirectory, 'corrTimeSettings')
        burninSteps = mixingResults['burninSteps']
        distSamples = mixingResults['distSamples']
Example #19
0
    temps = np.linspace(args.minT, args.maxT, args.numT)
    nSamples      = int(1e4)
    burninSteps   = int(1e4)
    magSide       = ''
    updateType    = 'async'


    targetDirectory = f'{os.getcwd()}/{args.dir}'
    os.makedirs(targetDirectory, exist_ok=True)

    settings = dict(
        nSamples         = nSamples, \
        burninSteps      = burninSteps, \
        updateMethod     = updateType
        )
    IO.saveSettings(targetDirectory, settings)

    for i, g in enumerate(ensemble):

        graph = nx.read_gpickle(g)
        filename = os.path.split(g)[-1].strip('.gpickle')

        modelSettings = dict(\
                             graph       = graph,\
                             updateType  = updateType,\
                             magSide     = magSide
                             )
        model = fastIsing.Ising(**modelSettings)

        Tc = Tc_idx = -1
        while Tc < 0:
from Utils import IO
from Toolbox import infcy
# init arg parse
parser = argparse.ArgumentParser()
parser.add_argument('--file')
import time
if __name__ == '__main__':
    # load settings
    # parse input
    args = parser.parse_args()
    runFile = args.file

    # load data to global
    print("IM AM IN RUNFILE", runFile)
    settings = IO.loadPickle(runFile)
    model = settings.get('model')

    # init model
    # run experiment
    if not settings.get('pulse'):
        # run snapshots (cheap enough)
        snaps = {k : settings.get(k) for\
                k in 'nSamples burninSamples steps'.split()\
                }
        snapshots = infcy.getSnapShots(model, **snaps)
    # load nudges
    else:

        # think of something to extract the control
        trial = settings.get('trial')
Example #21
0
	def read_instances(directories):
		result = []
		for d in directories:
			result += IO.read_iob_files(d)
		return result
Example #22
0
genDataFile = lambda x: f'dataset{idx}'

graphs = []
N = 10
# loadGraph = 'rerunthese.pickle2'
loadGraph = ''
if not loadGraph:
    for i in range(10):
        r = np.random.rand() * (1 - .2) + .2
        # g = nx.barabasi_albert_graph(N, 2)
        g = nx.erdos_renyi_graph(N, r)
        # g = nx.duplication_divergence_graph(N, r)
        graphs.append(g)
else:
    for graph in IO.loadPickle(loadGraph)['graphs']:
        graphs.append(graph)
# w = nx.utils.powerlaw_sequence(N, 2)
# g = nx.expected_degree_graph(w)
# g = sorted(nx.connected_component_subgraphs(g), key = lambda x: len(x))[-1]

#for i, j in g.edges():
#    g[i][j]['weight'] = np.random.rand() * 2 - 1
#        graphs.append(g)

#    graphs[0].add_edge(0,0)
#    for j in np.int32(np.logspace(0, np.log10(N-1),  5)):
#       graphs.append(nx.barabasi_albert_graph(N, j))
# dataDir = 'Graphs' # relative path careful
# df    = IO.readCSV(f'{dataDir}/Graph_min1_1.csv', header = 0, index_col = 0)
# h     = IO.readCSV(f'{dataDir}/External_min1_1.csv', header = 0, index_col = 0)
Example #23
0
	def run(self):
		"""
		TODO		
		"""
		iterations = []
		results = {}
		results_by_entity = {}
		# first lets' create test and train set for each iteration
		for x,iter in enumerate(self.dataSets_iterator):
			self.logger.info("Iteration %i"%(x+1))
			train_set=[]
			test_set=[]
			for y,set in enumerate(iter):
				for n,group in enumerate(set):
					if(y==0):
						train_set+=group
					else:
						test_set+=group
			iterations.append((train_set,test_set))
		
		# let's go through all the iterations
		for i,iter in enumerate(iterations):
			results["iter-%i"%(i+1)] = {}
			results_by_entity["iter-%i"%(i+1)] = {}
			train_file="%sfold_%i.train"%(self.evaluation_dir,i+1)
			test_file="%sfold_%i.test"%(self.evaluation_dir,i+1)
			IO.write_iob_file(iter[0],train_file)
			IO.write_iob_file(iter[1],test_file)
			# the following line is a bit of a workaround
			# to avoid recomputing the features when training
			# each new classifier, I take them from the file created
			# to train the CRF model (which should always be the first extractor
			# to be evaluated).
			filename = "%sfold_%i.train.train"%(self.extractors[0][1].TEMP_DIR,(i+1))
			f=codecs.open(filename,'r','utf-8')
			data = f.read()
			f.close()
			feature_sets=[[[token.split('\t')[:len(token.split('\t'))-1],token.split('\t')[len(token.split('\t'))-1:]] for token in instance.split('\n')] for instance in data.split('\n\n')]
			order = FeatureExtractor().get_feature_order()
			labelled_feature_sets=[]
			for instance in feature_sets:
				for token in instance:
					temp = [{order[n]:feature for n,feature in enumerate(token[0])},token[1][0]]
					labelled_feature_sets.append(temp)
			self.logger.info("read %i labelled instances"%len(feature_sets))
			for n,extractor in enumerate(self.extractors):
					extractor_settings = extractor[1]
					extractor_name = extractor[0]
					results["iter-%i"%(i+1)][extractor_name] = {}
					self.logger.info("Running iteration #%i with extractor %s"%(i+1,extractor_name))
					self.logger.info(train_file)
					self.logger.info(test_file)
					self.logger.info(extractor_settings)
					extractor_settings.DATA_FILE = train_file
					if(extractor_settings.CLASSIFIER is not None):
						extractor = citation_extractor(extractor_settings, extractor_settings.CLASSIFIER,labelled_feature_sets)
					else:
						extractor = citation_extractor(extractor_settings)
					self.logger.info(extractor.classifier)
					se = SimpleEvaluator([(extractor_name, extractor),],iob_file=test_file)
					results["iter-%i"%(i+1)][extractor_name] = se.eval()[extractor_name][0]
					results_by_entity["iter-%i"%(i+1)][extractor_name] = SimpleEvaluator.calc_stats_by_entity(se.eval()[extractor_name][1])
					#self.logger.info(results_by_entity["iter-%i"%(i+1)][extractor_name])
		return results,results_by_entity	
Example #24
0
import matplotlib.pyplot as plt, numpy as np, scipy, multiprocessing as mp, os, \
re, networkx as nx

from tqdm import tqdm
from Utils import IO, stats, misc, plotting as plotz
from functools import partial
"""
- work from a folder directory
"""
# standard stuff
#root =  '/run/media/casper/test/1550482875.0001953/'
root = '/home/casper/projects/information_impact/Data/1548025318.5751357'

data = IO.DataLoader(root)  # extracts data folders
settings = {key: IO.Settings(root)
            for key in data}  # load corresponding settings

centralities = {
                    r'$c_i^{deg}$' : partial(nx.degree, weight = 'weight'), \
                    r'$c_i^{betw}$': partial(nx.betweenness_centrality, weight = 'weight'),\
                    r'$c_i^{ic}$'  : partial(nx.information_centrality, weight = 'weight'),\
                    r'$c_i^{ev}$'  : partial(nx.eigenvector_centrality, weight = 'weight'),\
            }

figDir = '../thesis/figures/'
information_impact = '$\mu_i$'
causal_impact = '$\gamma_i$'


# %%
# begin the uggliness
    args = parser.parse_args()

    T = args.T  #float(sys.argv[1])
    targetDirectory = args.dir  #sys.argv[2]
    os.makedirs(targetDirectory, exist_ok=True)

    # load network
    graph = nx.read_gpickle(args.graph)
    N = len(graph)

    networkSettings = dict( \
        path = args.graph, \
        nNodes = N
    )
    IO.saveSettings(targetDirectory, networkSettings, 'network')

    # setup Ising model with nNodes spin flip attempts per simulation step
    modelSettings = dict( \
        temperature     = T, \
        updateType      = 'async' ,\
        magSide         = ''
    )
    #IO.saveSettings(targetDirectory, modelSettings, 'model')
    model = fastIsing.Ising(graph, **modelSettings)

    #print(model.mapping)
    #print(list(graph))

    # determine mixing/correlation time
    mixingTimeSettings = dict( \
        size = N, \
        nodes = nodes
    )

    # setup Ising model with N=networkSize spin flip attempts per simulation step
    modelSettings = dict( \
        temperature     = T, \
        updateType      = 'async' ,\
        magSide         = args.magSide if args.magSide in ['pos', 'neg'] else ''
    )
    model = fastIsing.Ising(graph, **modelSettings)

    # try to load data containing mixing and correlation time. If it doesn't exist
    # yet, use 'run_mixing.py' script to generate it
    try:
        mixingResults = IO.loadResults(targetDirectory, 'mixingResults')
        corrTimeSettings = IO.loadResults(targetDirectory, 'corrTimeSettings')
        burninSteps = mixingResults['burninSteps']
        distSamples = mixingResults['distSamples']
        print(f'mixing time      = {burninSteps}')
        print(f'correlation time = {distSamples}')
    except:
        subprocess.call(['python3', 'run_mixing.py', f'{args.T}', f'{args.dir}', f'{args.graph}', \
                        '--maxcorrtime', '10000', \
                        '--maxmixing', '10000'])
        mixingResults = IO.loadResults(targetDirectory, 'mixingResults')
        corrTimeSettings = IO.loadResults(targetDirectory, 'corrTimeSettings')
        burninSteps = mixingResults['burninSteps']
        distSamples = mixingResults['distSamples']

    # try to load neighbourhood shell data. If it doesn't exist yet, generate it
    head, tail = os.path.split(f)
    print(tail)

    data = IO.loadPickle(head, tail)

    gname = os.path.splitext(tail)[0].split('_Tc_results')[0]

    result = IO.TempsResult(data['temps'], data['mags'], data['abs_mags'], \
        data['sus'], data['binder'], data['T_c'], data['T_d'], data['T_o'], gname)
    dir = f'backup/tempsData/{head}'
    os.makedirs(dir, exist_ok=True)
    result.saveToPickle(dir)
"""

directory = 'output_systemEntropyGreedy'
for f in find_files(f'../masterthesis_casperscode/{directory}',
                    'simulation_results', 'dict.pickle'):
    head, tail = os.path.split(f)
    print(tail)

    data = IO.loadPickle(head, tail)
    #type = data['type']
    #data.pop('type')

    result = IO.SimulationResult(**data)
    t = tail.split('_')[-2]
    print(t)
    dir = f'backup/{directory}/{head}'
    os.makedirs(dir, exist_ok=True)
    result.saveToPickle(dir, timestamp=t)
Example #28
0
    sr = IO.SimulationResult(**store)
    IO.savePickle(fn, sr, verbose=1)
    checkTime()

# init models
if __name__ == "__main__":
    M = settings.get('model')
    args = parser.parse_args()
    file, PID = args.file, args.id
    print(file, PID)
    # this should only be run once per call
    if not file:
        g = nx.erdos_renyi_graph(3, np.random.uniform(0, 1))
        PSYCHO = True
        if PSYCHO:
            df = IO.readCSV('Graphs/Graph_min1_1.csv', header = 0,\
                    index_col = 0)
            h = IO.readCSV('Graphs/External_min1_1.csv', header = 0,\
                    index_col = 0)
            g = nx.from_pandas_adjacency(df)
            attr = {
                node: dict(H=row['externalField'])
                for node, row in h.iterrows()
            }
            nx.set_node_attributes(g, attr)
            modelSettings['magSide'] = ''
            settings['modelSettings'] = modelSettings

        m = M(graph = g, \
            **settings.get('modelSettings'), \
            equilibrium = equilibrium)
        matched = m.matched
    networkSettings = dict( \
        path = args.graph, \
        size = N, \
        node = node
    )

    # setup Ising model with N=networkSize spin flip attempts per simulation step
    modelSettings = dict( \
        temperature     = T, \
        updateType      = 'async' ,\
        magSide         = args.magSide if args.magSide in ['pos', 'neg'] else ''
    )
    model = fastIsing.Ising(graph, **modelSettings)

    try:
        mixingResults = IO.loadResults(targetDirectory, 'mixingResults')
        corrTimeSettings = IO.loadResults(targetDirectory, 'corrTimeSettings')
        burninSteps = mixingResults['burninSteps']
        distSamples = mixingResults['distSamples']

    except:
        subprocess.call(['python3', 'run_mixing.py', f'{T}', f'{args.dir}', f'{args.graph}', \
                        '--maxcorrtime', '10000', \
                        '--maxmixing', '10000'])
        mixingResults = IO.loadResults(targetDirectory, 'mixingResults')
        corrTimeSettings = IO.loadResults(targetDirectory, 'corrTimeSettings')
        burninSteps = mixingResults['burninSteps']
        distSamples = mixingResults['distSamples']
    allNeighbours_G, allNeighbours_idx = model.neighboursAtDist(node, maxDist)

import time
from Utils.stats import KL, JS
deltas = 200
repeats = int(1e4)
start = time.time()

N = 5  # repeats?
#nudges[-1] = np.inf
#graph = nx.barabasi_albert_graph(10, 2)
graph = nx.erdos_renyi_graph(5, .2)
#graph = nx.read_weighted_edgelist('Graphs/aves-barn-swallow-non-physical.edges')
# %%
#graph = nx.krackhardt_kite_graph()

dataDir = 'Graphs'  # relative path careful
df = IO.readCSV(f'{dataDir}/Graph_min1_1.csv', header=0, index_col=0)
h = IO.readCSV(f'{dataDir}/External_min1_1.csv', header=0, index_col=0)
#graph   = nx.from_pandas_adjacency(df)
#graph = nx.erdos_renyi_graph(10, .3)
#graph = nx.path_graph(5)
#graph = nx.Graph()
#graph.add_edge(0,1)
#graph.add_edge(1,2)
#graph.add_edge(2,0)
#graph[0][1]['weight'] = 10
#attr = {}
#graph = nx.krackhardt_kite_graph()
graph = nx.erdos_renyi_graph(5, .3)

#for node, row in h.iterrows():
#    attr[node] = dict(H = row['externalField'], nudges = 0)