Exemple #1
0
 def __init__(self, file):
     fileRead = open(file, 'r')
     self.delta = dict()
     definitions = fileRead.read().replace(' ',
                                           '').replace('\n', '').replace(
                                               '\t', '').split('.')
     del definitions[-1]
     for definition in definitions:
         if (definition.split(':')[0][0] == 'T'):
             transitionsString = definition.replace(
                 ':=',
                 '=').split('T:')[1].replace('},',
                                             '*').replace('}',
                                                          '*').split('*')
             for transition in transitionsString:
                 if (transition != ''):
                     transition = transition.split('={')
                     state = transition[0][0]
                     simbol = transition[0][2]
                     targets = set(transition[1].split(','))
                     deltaAux = dict([(simbol, targets)])
                     if self.delta.has_key(state):
                         self.delta[state][simbol] = targets
                     else:
                         self.delta[state] = deltaAux
         elif (definition.split(':')[0][0] == 'I'):
             self.q0 = definition.split(':')[1].split(',')[0]
         elif (definition.split(':')[0][0] == 'F'):
             self.F = definition.split(':')[1].split(',')
     fileRead.close()
Exemple #2
0
 def post(self, bool, bytes, dict, float, int, list, null, unicode):
     return __builtin__.dict(
         zip(
             ("bool", "bytes", "dict", "float", "int", "list", "null", "unicode"),
             (bool, bytes, dict, float, int, list, null, unicode),
         )
     )
Exemple #3
0
    def __init__(self,config,configAbsPath):

        # A dictionary containing the ioHub configuration file settings
        self.ioHubConfig=config

        # the path to the ioHub configuration file itself.
        self.ioHubConfigAbsPath=configAbsPath

        # udp port setup
        self.udp_client = UDPClientConnection(coder=self.ioHubConfig['ipcCoder'])

        # the dynamically generated object that contains an attribute for each device registed for monitoring
        # with the ioHub server so that devices can be accessed experiment process side by device name.
        self.devices=ioHubDevices(self)

        # a dictionary that holds the same devices represented in .devices, but stored in a dictionary using the device
        # name as the dictionary key
        self.deviceByLabel=dict()

        # attribute to hold the current experiment ID that has been created by the ioHub ioDataStore if saving data to the
        # ioHub hdf5 file type.
        self.experimentID=None

        # attribute to hold the current experiment session ID that has been created by the ioHub ioDataStore if saving data to the
        # ioHub hdf5 file type.
        self.experimentSessionID=None
Exemple #4
0
    def __init__(self, config, configAbsPath):

        # A dictionary containing the ioHub configuration file settings
        self.ioHubConfig = config

        # the path to the ioHub configuration file itself.
        self.ioHubConfigAbsPath = configAbsPath

        # udp port setup
        self.udp_client = UDPClientConnection(
            coder=self.ioHubConfig['ipcCoder'])

        # the dynamically generated object that contains an attribute for each device registed for monitoring
        # with the ioHub server so that devices can be accessed experiment process side by device name.
        self.devices = ioHubDevices(self)

        # a dictionary that holds the same devices represented in .devices, but stored in a dictionary using the device
        # name as the dictionary key
        self.deviceByLabel = dict()

        # attribute to hold the current experiment ID that has been created by the ioHub ioDataStore if saving data to the
        # ioHub hdf5 file type.
        self.experimentID = None

        # attribute to hold the current experiment session ID that has been created by the ioHub ioDataStore if saving data to the
        # ioHub hdf5 file type.
        self.experimentSessionID = None
Exemple #5
0
def get_project_info(manager, project):
    project_id = project['value']['_id']
    questionnaire = Project.get(manager, project_id)
    questionnaire_code = questionnaire.form_code

    analysis, disabled, log = get_project_analysis_and_log_link(project_id, questionnaire_code)

    web_submission_link = reverse("web_questionnaire", args=[project_id])

    web_submission_link_disabled = 'disable_link'
    if 'web' in project['value']['devices']:
        web_submission_link_disabled = ""

    create_subjects_links = {}
    for entity_type in questionnaire.entity_type:
        create_subjects_links.update({entity_type: append_query_strings_to_url(reverse("create_subject", args=[entity_type]),
                                                           web_view=True)})

    project_info = dict(project_id=project_id,
                        name=project['value']['name'],
                        qid=questionnaire_code,
                        created=project['value']['created'],
                        link=(reverse('project-overview', args=[project_id])),
                        log=log, analysis=analysis, disabled=disabled,
                        web_submission_link=web_submission_link,
                        web_submission_link_disabled=web_submission_link_disabled,
                        create_subjects_link=create_subjects_links,
                        entity_type=questionnaire.entity_type,
                        encoded_name=urlquote(project['value']['name']),
                        import_template_file_name=slugify(project['value']['name']))
    return project_info
Exemple #6
0
def dict(*a, **k):
    import warnings
    import __builtin__
    warnings.warn(
        'twisted.python.util.dict is deprecated.  Use __builtin__.dict instead'
    )
    return __builtin__.dict(*a, **k)
Exemple #7
0
 def as_dict(self):
     photo = __builtin__.dict(Id=self.Id,
                              HouseId=self.HouseId,
                              RelativePath=self.RelativePath,
                              CreatedAt=self.CreatedAt,
                              UpdatedAt=self.UpdatedAt)
     return photo
Exemple #8
0
def get_project_info(manager, raw_project):
    project_id = raw_project['value']['_id']
    project = Project.load(manager.database, project_id)
    questionnaire = manager.get(project.qid, FormModel)
    questionnaire_code = questionnaire.form_code

    analysis, disabled, log = get_project_analysis_and_log_link(project, project_id, questionnaire_code)

    web_submission_link = reverse("web_questionnaire", args=[project_id])

    web_submission_link_disabled = 'disable_link'
    if 'web' in raw_project['value']['devices']:
        web_submission_link_disabled = ""

    create_subjects_link = ''
    if 'no' in raw_project['value']['activity_report']:
        create_subjects_link = reverse(create_subject, args=[project.entity_type])

    project_info = dict(name=raw_project['value']['name'],
        qid=questionnaire_code,
        created=raw_project['value']['created'],
        type=raw_project['value']['project_type'],
        link=(reverse(project_overview, args=[project_id])),
        log=log, analysis=analysis, disabled=disabled,
        web_submission_link=web_submission_link,
        web_submission_link_disabled=web_submission_link_disabled,
        create_subjects_link=create_subjects_link,
        entity_type=project.entity_type)
    return project_info
Exemple #9
0
 def as_dict_JSON(self):
     landlord = __builtin__.dict(Id=self.Id,
                                 FirstName=self.FirstName,
                                 LastName=self.LastName,
                                 Email=self.Email,
                                 Phone=self.Phone)
     return landlord
Exemple #10
0
 def as_dict_JSON(self):
     student = __builtin__.dict(Id=self.Id,
                                FirstName=self.FirstName,
                                LastName=self.LastName,
                                Email=self.Email,
                                Phone=self.Phone)
     return student
Exemple #11
0
def create_global_topic_list(articleList):
    e = re.compile(r"\s(de)\s")
    u = re.compile(r"\s(du)\s")
    globalTopicList = []
    
    i = 0
    for commList in articleList.values():
        # Article body + all comments 
        art = commList[0].artBody        
        for comm in commList:
            art += comm.body
            
        # Global list of named entities
        art = u.sub(" Du ", art)            
        art = e.sub(" De ", art)
        entities = extract_entities(wordpunct_tokenize(art))
        globalTopicList += entities 
        i += 1
        if i % 100 == 0:
            print i,"comments processed for global vector" 

    globalTopicList = nltk.FreqDist(globalTopicList)

    tempVector = dict()
    for k in globalTopicList.items()[:100]:
        tempVector[k[0]] = 0
    
    f = open("globalTopics" + '.pkl', 'wb')
    pickle.dump(tempVector, f, pickle.HIGHEST_PROTOCOL)
    f.close()
Exemple #12
0
def read_user_data(filename):
    f = open(filename, 'r')        
        
    # To process all the comments
    userList = dict()
    commentCount = 0
    for line in f:        
        temp = line.split('&')
        if len(temp) < 9:
            continue
        
        userid = temp[0]
        inDeg = temp[1]
        outDeg = temp[2]
        age = temp[3]
        postCount = temp[4]
        postRate = temp[5]
        pageRank = temp[6]
        hub = temp[7]
        auth = temp[8]
        
        
        comm = [inDeg, outDeg, age, postCount, postRate, pageRank, hub, auth]
        
        userList[userid] = comm
        
        commentCount += 1
        if commentCount % 10000 == 0:
            print "Read", commentCount, "user comments"
        

    print "done reading"
            
        
    return userList, len(userList)
def get_project_info(manager, raw_project):
    project_id = raw_project['value']['_id']
    project = Project.load(manager.database, project_id)
    questionnaire = manager.get(project.qid, FormModel)
    questionnaire_code = questionnaire.form_code

    analysis, disabled, log = get_project_analysis_and_log_link(project, project_id, questionnaire_code)

    web_submission_link = reverse("web_questionnaire", args=[project_id])

    web_submission_link_disabled = 'disable_link'
    if 'web' in raw_project['value']['devices']:
        web_submission_link_disabled = ""

    create_subjects_link = ''
    if 'no' in raw_project['value']['activity_report']:
        create_subjects_link = append_query_strings_to_url(reverse("create_subject", args=[project.entity_type]),
                                                           web_view=True)

    project_info = dict(project_id=project_id,
                        name=raw_project['value']['name'],
                        qid=questionnaire_code,
                        created=raw_project['value']['created'],
                        type=raw_project['value']['project_type'],
                        link=(reverse('project-overview', args=[project_id])),
                        log=log, analysis=analysis, disabled=disabled,
                        web_submission_link=web_submission_link,
                        web_submission_link_disabled=web_submission_link_disabled,
                        create_subjects_link=create_subjects_link,
                        entity_type=project.entity_type,
                        encoded_name=urlquote(raw_project['value']['name']),
                        import_template_file_name=slugify(raw_project['value']['name']))
    return project_info
Exemple #14
0
def _construct_project_dict(user, local_time_delta, project):
    project_id = project['project_id']
    delete_links = reverse('delete_project', args=[project_id])
    disable_link_class, hide_link_class = _get_visibility_settings_for(
        user, project)

    return dict(
        delete_links=delete_links,
        name=project['name'],
        created=convert_utc_to_localized(local_time_delta, project['created']),
        qid=project['qid'],
        link=project['link'],
        web_submission_link_disabled=project['web_submission_link_disabled'],
        web_submission_link=project['web_submission_link'],
        analysis=project['analysis'],
        disabled=project['disabled'],
        log=project['log'],
        create_subjects_link=project['create_subjects_link'],
        entity_type=project['entity_type'],
        encoded_name=project['encoded_name'],
        import_template_file_name=project['import_template_file_name'],
        is_advanced_questionnaire=bool(project['is_advanced_questionnaire']),
        is_poll=project['is_poll'],
        disable_link_class=disable_link_class,
        hide_link_class=hide_link_class)
Exemple #15
0
    def displayExperimentSessionSettingsDialog(self):
        """
        Display an editable dialog showing the experiment session setting retrieved from the configuration file.
        This includes the few manditory ioHub experiment session attributes, as well as any user defined experiment session
        attributes that have been defined in the experiment configuration file. If OK is selected in the dialog,
        the experiment logic continues, otherwise the experiment session is terminated.
        """
        allSessionDialogVariables = dict(self.experimentSessionDefaults,
                                         **self.sessionUserVariables)
        sessionVariableOrder = self.configuration['session_variable_order']
        if 'user_variables' in allSessionDialogVariables:
            del allSessionDialogVariables['user_variables']

        sessionDlg = psychopy.gui.DlgFromDict(allSessionDialogVariables,
                                              'Experiment Session Settings',
                                              [], sessionVariableOrder)

        if sessionDlg.OK:
            for key, value in allSessionDialogVariables.iteritems():
                if key in self.experimentSessionDefaults:
                    self.experimentSessionDefaults[key] = str(value)
                elif key in self.sessionUserVariables:
                    self.sessionUserVariables[key] = str(value)
            return False
        return True
Exemple #16
0
 def _eventListToDict(eventValueList):
     """
     Convert an ioHub event that is current represented as an orderded list of values, and return the event as a
     dictionary of attribute name, attribute values for the object.
     """
     eclass = EventConstants.EVENT_CLASSES[eventValueList[3]]
     combo = zip(eclass.attributeNames, eventValueList)
     return dict(combo)
Exemple #17
0
 def as_dict(self):
     dev = __builtin__.dict(Id=self.Id,
                            ProjectName=self.ProjectName,
                            Email=self.Email,
                            Key=self.Key,
                            CreatedAt=self.CreatedAt,
                            UpdatedAt=self.UpdatedAt)
     return dev
Exemple #18
0
 def _eventListToDict(eventValueList):
     """
     Convert an ioHub event that is current represented as an orderded list of values, and return the event as a
     dictionary of attribute name, attribute values for the object.
     """
     eclass=EventConstants.EVENT_CLASSES[eventValueList[3]]
     combo = zip(eclass.attributeNames,eventValueList)
     return dict(combo)
Exemple #19
0
 def as_dict_JSON(self):
     review = __builtin__.dict(Id=self.Id,
                               HouseId=self.HouseId,
                               StudentId=self.StudentId,
                               Stars=self.Stars,
                               Comment=self.Comment,
                               CreatedAt=str(self.CreatedAt),
                               UpdatedAt=str(self.UpdatedAt))
     return review
Exemple #20
0
 def _eventListToObject(eventValueList):
     """
     Convert an ioHub event that is current represented as an orderded list of values, and return the correct
     ioHub.devices.DeviceEvent subclass for the given event type.
     """
     eclass = EventConstants.EVENT_CLASSES[eventValueList[3]]
     combo = zip(eclass.attributeNames, eventValueList)
     kwargs = dict(combo)
     return eclass(**kwargs)
Exemple #21
0
 def _eventListToObject(eventValueList):
     """
     Convert an ioHub event that is current represented as an orderded list of values, and return the correct
     ioHub.devices.DeviceEvent subclass for the given event type.
     """
     eclass=EventConstants.EVENT_CLASSES[eventValueList[3]]
     combo = zip(eclass.attributeNames,eventValueList)
     kwargs = dict(combo)
     return eclass(**kwargs)
Exemple #22
0
def extract_bigrams(articleList, commentCount):
    featureMatrix = np.zeros([commentCount,100])

    index = 0
    stemmer = SnowballStemmer("english", ignore_stopwords=True)
    bagOfWords = []
    for art in articleList.items():        
        for comm in art[1]:
            mywords = words(comm.body)
            mywords = known_words(mywords)
            # Remove Stops
            filtered_words = [w for w in mywords if not w in stopwords.words('english')]
            # Stemming
            stemmed_words = [stemmer.stem(w) for w in filtered_words]
            bagOfWords += stemmed_words
            bagOfWords.append("\n")
            
    tempVector = dict()
        
    #Create your bigrams
    bgs = nltk.bigrams(bagOfWords)

    fdist = nltk.FreqDist(bgs)   
    
    for k in fdist.keys()[:100]:
        tempVector[k] = 0
    
    
    theKeys = tempVector.keys()
    
    for art in articleList.items():        
        for comm in art[1]:
            mywords = words(comm.body)
            mywords = known_words(mywords)
            # Remove Stops
            filtered_words = [w for w in mywords if not w in stopwords.words('english')]
            # Stemming
            stemmed_words = [stemmer.stem(w) for w in filtered_words]
            bgs = nltk.bigrams(stemmed_words)
            for word in (w for w in bgs if tempVector.has_key(w)):
                keyInd = theKeys.index(word)      
                featureMatrix[index][keyInd] += 1
                           
            index += 1
            if index % 100 == 0:
                print "extracted", index, "features"
        
            if index >= commentCount:
                break            
            
            
    
    
    print "non-zero",np.count_nonzero(featureMatrix)
    print "Percentage filled:%.2f" %(float(np.count_nonzero(featureMatrix))/(featureMatrix.shape[0]*featureMatrix.shape[1]))
    return featureMatrix
Exemple #23
0
 def as_dict(self):
     student = __builtin__.dict(Id=self.Id,
                                FirstName=self.FirstName,
                                LastName=self.LastName,
                                Email=self.Email,
                                Phone=self.Phone,
                                IsActive=self.IsActive,
                                CreatedAt=self.CreatedAt,
                                UpdatedAt=self.UpdatedAt)
     return student
Exemple #24
0
def extract_social_features(df_comments):
    socialVector = np.empty([df_comments.shape[0],8])
    index = 0
        
    graph = networkx.DiGraph()   
    
    userdict = dict()
    for _, row in df_comments.iterrows():
        userdict[row['comment_id']] = row['author']
        
    for user in set(userdict.values()):
        graph.add_node(user)
        
         
    for _, row in df_comments.iterrows():
        if not userdict.has_key(row['thread_root_id']):
            continue
        
        source = userdict[row['comment_id']]
        dest = userdict[row['thread_root_id']]
        if source == dest:
            continue
        graph.add_edge(source, dest)
    
    pageranker = networkx.pagerank(graph, alpha=0.85)
    hubs, auths = networkx.hits(graph)
    
    author_groupby = df_comments.groupby('author')
    user_age_dict = {}
    user_nr_posts_dict = {}
    for _,group in author_groupby:
        first_date = datetime.fromtimestamp(mktime(group.date.values[0]))
        last_date = datetime.fromtimestamp(mktime(group.date.values[-1]))
        diff = last_date - first_date
        days = diff.days
        user_age_dict[group.author.values[0]] = days + 1
        user_nr_posts_dict[group.author.values[0]] = len(group)
        
    for ix, row in df_comments.iterrows():            
        user = userdict[row['comment_id']]
        socialVector[ix][0] = graph.in_degree(user) #In Degree
        socialVector[ix][1] = graph.out_degree(user) #Out Degree
        socialVector[ix][2] = user_age_dict[user] #User Age
        socialVector[ix][3] = user_nr_posts_dict[user] #Nr of Posts
        socialVector[ix][4] = user_nr_posts_dict[user]/float(user_age_dict[user]) # Postrate
        socialVector[ix][5] = pageranker[user] # Pagerank
        socialVector[ix][6] = hubs[user] # Pagerank
        socialVector[ix][7] = auths[user] # Pagerank
    
        index += 1
        if index % 1000 == 0:
            print "extracted", index, "values"
        
                
    return socialVector
Exemple #25
0
 def __init__(self, header = None, li = (), idName = None, RowClass = util.Row, dict = None):
     if dict is not None:
         self.items = dict
     elif header is not None and li is not None:
         idfield = header.index(idName)
         self.items = __builtin__.dict([ (i[idfield], i) for i in li ])
     else:
         self.items = {}
     self.header = header
     self.RowClass = RowClass
     self.idName = idName
Exemple #26
0
 def __init__(self, header=None, li=(), idName=None, RowClass=util.Row, dict=None):
     if dict is not None:
         self.items = dict
     elif header is not None and li is not None:
         idfield = header.index(idName)
         self.items = __builtin__.dict([(i[idfield], i) for i in li])
     else:
         self.items = {}
     self.header = header
     self.RowClass = RowClass
     self.idName = idName
Exemple #27
0
def index(request):
    disable_link_class, hide_link_class, page_heading = projects_index(request)
    rows = get_project_list(request)
    project_list = []
    project_list.sort(key=itemgetter('name'))
    smart_phone_instruction_link = reverse("smart_phone_instruction")
    for project in rows:
        project_id = project['project_id']
        delete_links = reverse('delete_projects', args=[project_id])
        project = dict(delete_links=delete_links,
                       name=project['name'],
                       created=project['created'],
                       qid=project['qid'],
                       link=project['link'],
                       web_submission_link_disabled=project['web_submission_link_disabled'],
                       web_submission_link=project['web_submission_link'],
                       analysis=project['analysis'],
                       disabled=project['disabled'],
                       log=project['log'],
                       create_subjects_link=project['create_subjects_link'],
                       entity_type=project['entity_type'],
                       encoded_name=project['encoded_name'],
                       import_template_file_name=project['import_template_file_name']
        )

        project_list.append(project)
    activation_success = request.GET.get('activation', False)

    error_messages = []
    if "associate" in request.GET.keys():
        error_messages = [_('You may have been dissociated from the project. Please contact your administrator for more details.')]
    if is_crs_admin(request):
        return render_to_response('alldata/index.html',
                                  {'projects': project_list, 'page_heading': page_heading,
                                   'disable_link_class': disable_link_class,
                                   'hide_link_class': hide_link_class, 'is_crs_admin': True,
                                   'project_links': get_alldata_project_links(),
                                   'is_quota_reached':is_quota_reached(request),
                                   'error_messages': error_messages,
                                   'activation_success': activation_success},
                                  context_instance=RequestContext(request))
    else:
        return render_to_response('alldata/index.html',
                                  {'projects': project_list, 'page_heading': page_heading,
                                   'disable_link_class': disable_link_class,
                                   'hide_link_class': hide_link_class, 'is_crs_admin': False,
                                   "smart_phone_instruction_link": smart_phone_instruction_link,
                                   'project_links': get_alldata_project_links(),
                                   'is_quota_reached':is_quota_reached(request),
                                   'error_messages': error_messages,
                                   'activation_success': activation_success},
                                  context_instance=RequestContext(request))
Exemple #28
0
def get_project_info(manager, project):
    project_id = project['_id']
    questionnaire = Project.new_from_doc(manager,
                                         ProjectDocument.wrap(project))
    questionnaire_code = questionnaire.form_code

    analysis, disabled, log = get_project_analysis_and_log_link(
        project_id, questionnaire_code)

    web_submission_link = reverse("web_questionnaire", args=[project_id])

    web_submission_link_disabled = 'disable_link'
    if 'web' in project['devices']:
        web_submission_link_disabled = ""

    create_subjects_links = {}
    for entity_type in questionnaire.entity_type:
        create_subjects_links.update({
            entity_type:
            append_query_strings_to_url(reverse("subject_questionnaire",
                                                args=[project_id,
                                                      entity_type]),
                                        web_view=True)
        })
    if questionnaire.is_poll:
        project_link = reverse("submissions",
                               args=[project_id, questionnaire_code])
    else:
        project_link = reverse('project-overview', args=[project_id])

    project_info = dict(
        project_id=project_id,
        _id=project_id,
        name=project['name'],
        qid=questionnaire_code,
        created=project['created'],
        is_advanced_questionnaire=bool(project.get('xform')),
        link=project_link,
        log=log,
        analysis=analysis,
        disabled=disabled,
        web_submission_link=web_submission_link,
        web_submission_link_disabled=web_submission_link_disabled,
        create_subjects_link=create_subjects_links,
        entity_type=questionnaire.entity_type,
        encoded_name=urlquote(project['name']),
        import_template_file_name=slugify(project['name']),
        is_poll=bool(questionnaire.is_poll),
        is_project_manager=project.get('is_project_manager', False))
    return project_info
Exemple #29
0
    def __init__(self, filename):
        '''
        filename: inits the UBRR data from the input file
        '''

        ub_map = dict()
        ub_ratings = dict()

        cnt = 0

        #read the file
        if filename.endswith('.gz'):
            f = gzip.open(filename, 'r')
        else:
            f = open(filename, 'r')

        for line in f:
            vals = line.split("\t")
            if len(vals) == 0:
                continue

            u = vals[0]
            b = vals[1]
            r = float(vals[2])
            d = vals[3].strip()

            ub_map[(u, b)] = self._int_list(d)
            ub_ratings[(u, b)] = r

            cnt += 1

        self.user_item_map = ub_map
        self.user_item_rating = ub_ratings

        f.close()
        print 'Data Pair Manager Initialized with ', cnt, ' reviews'
 def __init__(self, file):
     fileRead = open(file, 'r')
     self.delta = dict()
     definitions = fileRead.read().replace(' ', '').replace('\n', '').replace('\t', '').split('.')
     del definitions[-1]
     for definition in definitions:
         if (definition.split(':')[0][0] == 'T'):
             transitionsString = definition.replace(':=','=').split('T:')[1].replace('},','*').replace('}','*').split('*')
             for transition in transitionsString:
                 if (transition != ''):
                     transition = transition.split('={')
                     state = transition[0][0]
                     simbol = transition[0][2]
                     targets = set(transition[1].split(','))
                     deltaAux = dict([(simbol,targets)])
                     if self.delta.has_key(state):
                         self.delta[state][simbol] = targets
                     else:
                         self.delta[state] = deltaAux
         elif (definition.split(':')[0][0] == 'I'):
             self.q0 = definition.split(':')[1].split(',')[0]
         elif (definition.split(':')[0][0] == 'F'):
             self.F = definition.split(':')[1].split(',')
     fileRead.close()
Exemple #31
0
def extract_word_clusters(commentList, commentCount):
    brown_ic = wordnet_ic.ic('ic-brown.dat')
    a, corpus, global_synsets = extract_global_bag_of_words(commentList, True)
    similarity_dict = {}
    i = 0
    t = len(global_synsets)**2
    
    for syn_out in global_synsets:
        similarity_dict[syn_out] = {} 
        for syn_in in global_synsets:
            if syn_in.pos() == syn_out.pos():
                similarity_dict[syn_out][syn_in] = syn_out.lin_similarity(syn_in, brown_ic)
            else:
                similarity_dict[syn_out][syn_in] = max(wn.path_similarity(syn_out,syn_in), wn.path_similarity(syn_in,syn_out))
        
            if i % 10000 == 0:
                print i, 'synsets processed out of',len(global_synsets)**2, '(',float(i)/(t),'%)'
            i += 1

    tuples = [(i[0], i[1].values()) for i in similarity_dict.items()] 
    vectors = [np.array(tup[1]) for tup in tuples]

    
    # Rule of thumb
    n = sqrt(len(global_synsets)/2)
    print "Number of clusters", n
    km_model = KMeans(n_clusters=n)
    km_model.fit(vectors)
    
    clustering = collections.defaultdict(list)
    for idx, label in enumerate(km_model.labels_):
        clustering[label].append(tuples[idx][0])
        
    pprint.pprint(dict(clustering), width=1)
    
    feature_vector = np.zeros([len(corpus),n])
    
    for i,comment in enumerate(corpus):
        for w in comment:
            for key, clust in clustering.items():
                if w in clust:
                    feature_vector[i][key] += 1
        if i % 1000 == 0:
            print i, 'comments processed'
        
    print feature_vector
    '''
    def dict_processor(data):
        if not isinstance(data, __builtin__.dict) and not coerce_:
            raise DataTypeError('dict')
        else:
            try:
                data = __builtin__.dict(data)
            except (TypeError, ValueError):
                raise DataTypeError('dict')

        cleandata = {}
        errors = {}
        seen = set()
        for name, value in data.items():
            try:
                if name in procs:
                    cleandata[name] = procs[name].process(value)
                    seen.add(name)
                elif ignore_extra:
                    pass
                elif pass_extra:
                    cleandata[name] = value
                else:
                    raise ExtraDataError()
            except CheckerError as ex:
                if capture_all_errors:
                    errors[name] = unicode(ex)
                else:
                    ex.field = name
                    raise ex

        if not ignore_missing:
            for name in (set(procs.keys()) - seen):
                try:
                    cleandata[name] = procs[name].process(None)
                except CheckerError as ex:
                    if capture_all_errors:
                        errors[name] = unicode(ex)
                    else:
                        ex.field = name
                        raise ex

        if errors:
            raise DictionaryError(errors)

        return cleandata
Exemple #33
0
    def as_dict(self):
        house = __builtin__.dict(Id=self.Id,
                                 LandlordId=self.LandlordId,
                                 Address1=self.Address1,
                                 Address2=self.Address2,
                                 City=self.City,
                                 State=self.State,
                                 Zipcode=self.Zipcode,
                                 Rooms=self.Rooms,
                                 ParkingSpots=self.ParkingSpots,
                                 MonthlyRent=self.MonthlyRent,
                                 UtilitiesIncluded=self.UtilitiesIncluded,
                                 Laundry=self.Laundry,
                                 Pets=self.Pets,
                                 Latitude=self.Latitude,
                                 Longitude=self.Longitude,
                                 DistFromCC=self.DistFromCC,
                                 DateAvailable=str(self.DateAvailable),
                                 LeaseTerm=self.LeaseTerm)

        return house
Exemple #34
0
 def displayExperimentSessionSettingsDialog(self):
     """
     Display an editable dialog showing the experiment session setting retrieved from the configuration file.
     This includes the few manditory ioHub experiment session attributes, as well as any user defined experiment session
     attributes that have been defined in the experiment configuration file. If OK is selected in the dialog,
     the experiment logic continues, otherwise the experiment session is terminated.
     """
     allSessionDialogVariables = dict(self.experimentSessionDefaults, **self.sessionUserVariables)
     sessionVariableOrder=self.configuration['session_variable_order']
     if 'user_variables' in allSessionDialogVariables:
         del allSessionDialogVariables['user_variables']
     
     sessionDlg=psychopy.gui.DlgFromDict(allSessionDialogVariables, 'Experiment Session Settings', [], sessionVariableOrder)
     
     if sessionDlg.OK:
         for key,value in allSessionDialogVariables.iteritems():
             if key in self.experimentSessionDefaults:
                 self.experimentSessionDefaults[key]=str(value)
             elif  key in self.sessionUserVariables:
                 self.sessionUserVariables[key]=str(value)   
         return False
     return True
def create_record_categorical(model, y, cv, names, class_names, conf_matrix_list, oob_estimates, baseline=None):
    
    txt_time = str(datetime.datetime.now())

    saved_params = {}
    try:
        params = model.get_params()
        for k in params:
            if type(params[k]) in (str,int,np.array,list,dict):
                try:
                    json.dumps(params[k])                
                    saved_params[k] = params[k]
                except TypeError:
                    saved_params[k] = repr(params[k])
    except:
        pass    

    cv_full = [[train,test] for (train,test) in cv]
    y_label = one_hot_to_label(y)
    
    samples = [{'name': names[i], 'prob_label' : float(p[y_label[i]]), 'prob_pred' : float(p[np.argmax(p)]), 'pred' : float(np.argmax(p)), 'label' : float(y_label[i]), 'class_label' : class_names[y_label[i]], 'class_pred' : class_names[np.argmax(p)]} for i,p in oob_estimates.iteritems()];
    samples = sorted(samples, key=lambda v:  v['prob_label'])
    samples_predicted = sorted(samples, key=lambda v:  -v['prob_label'])
        
    #get the classification report
    y_pred = []
    y_label = []
    for sample in samples:
        y_pred.append(sample['pred'])
        y_label.append(sample['label'])
    y_pred = np.array(y_pred)
    y_label = np.array(y_label)
    
    weighted_f1 = f1_score(y_label, y_pred, average='weighted')  
    
    conf_matrix = confusion_matrix(y_label, y_pred, labels=range(len(class_names)))
    conf_matrix = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis]

    cmap_viridis =  matplotlib.cm.get_cmap('viridis')
    fig_size = plt.gcf().get_size_inches()
    plt.figure(figsize=fig_size*3)
    plt.imshow(conf_matrix, interpolation='nearest', cmap=cmap_viridis)
    plt.title('CM, Train: %d, Test: %d, cv: %d, F1: %0.4f' % (len(cv_full[0][0]), len(cv_full[0][1]), len(cv_full), weighted_f1))
    plt.clim(0,1)
    plt.colorbar()
    
    if len(class_names)<200:
        tick_marks = np.arange(len(class_names))    
        plt.xticks(tick_marks, class_names, rotation=90)
        plt.yticks(tick_marks, class_names)
        plt.grid(True)

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
    #store the output
    store = dict()
    store['params'] = saved_params
    store['weighted_f1'] = weighted_f1
    store['size'] = len(cv_full[0][0])+len(cv_full[0][1])
    store['cv_size'] = len(cv_full)
    store['cv_train'] = len(cv_full[0][0])
    store['cv_test'] = len(cv_full[0][1])
            
    store['report'] = classification_report(y_label, y_pred, labels=range(len(class_names)), target_names=class_names, digits=3)
    store['top_missed'] = samples[0:200];
    store['top_predicted'] = samples_predicted[0:200];
    
    #print the report
    print store['report']
    
    val_results = {}
    val_results['oob_estimates'] = samples
    val_results['conf_matrix'] = conf_matrix.tolist()
    
    img_results = {}

    buf = io.BytesIO()    
    plt.savefig(buf)
    img_results['confusion_matrix'] = buf
    
    results = {}
    results['time'] = txt_time
    results['output'] = store
    results['validation'] = val_results
    results['images'] = img_results

    plt.close()

    return results
def create_record_regression(model, y, cv, names, loss_array, oob_estimates):
        
    txt_time = str(datetime.datetime.now())
    
    loss_values = oob_estimates.values()
    loss = np.mean(loss_values)
    
    plt.hist(loss_values, 200, weights=np.ones(len(loss_values))/len(loss_values), label='Loss Distribution: loss={}'.format(loss))
    
    cv_full = [[train,test] for (train,test) in cv]
    
    plt.xlabel('Loss')
    plt.ylabel('Fraction')
    plt.title('Loss, Training: %d, Testing: %d, cv: %d' % (len(cv_full[0][0]), len(cv_full[0][1]), len(cv_full)))
    plt.legend(loc="upper center")
    plt.tight_layout()
    #plt.show()
        
    saved_params = {}
    try:
        params = model.get_params()
        for k in params:
            if type(params[k]) in (str,int,np.array,list,dict):
                try:
                    json.dumps(params[k])                
                    saved_params[k] = params[k]
                except TypeError:
                    saved_params[k] = repr(params[k])
    except:
        pass    
    
    #store the output
    store = dict()
    store['params'] = saved_params
    store['loss'] = float(loss)
    store['cv_size'] = len(cv_full)
    store['size'] = len(cv_full[0][0])+len(cv_full[0][1])
    store['benign_size'] = float((y==0).sum())
    store['malware_size'] = float((y==1).sum())
    store['cv_train'] = len(cv_full[0][0])
    store['cv_test'] = len(cv_full[0][1])
    
    loss_store = [{'name': names[i], 'loss' : float(p)} for i,p in oob_estimates.iteritems()];    
    loss_store = sorted(loss_store, key=lambda v: -v['loss'])
    
    store['loss_top'] = loss_store[0:500];    
    store['loss_bottom'] = loss_store[-500:-1];
    
    val_results = {}
    val_results['oob_estimates'] = loss_store
            
    #save the model
    img_results = {}
    
    buf = io.BytesIO()    
    plt.savefig(buf)
    img_results['img_loss'] = buf
    
    results = {}
    results['time'] = txt_time
    results['output'] = store
    results['validation'] = val_results
    results['images'] = img_results
    
    plt.close()

    return results
Exemple #37
0
 def closure(*ap, **kp):
     A, K = a+ap, sortedtuple(k.items() + kp.items())
     return state[(A,K)] if (A,K) in state else state.setdefault((A,K), f(*A, **__builtin__.dict(k.items()+kp.items())))
def generate_and_push_new_documentation_page(
    temporary_documentation_folder,
    distribution_bundle_file,
    has_api_documentation,
    temporary_documentation_node_modules_directory
):
# #
    '''
        Renders a new index.html file and copies new assets to generate a new \
        documentation homepage.
    '''
    global BUILD_DOCUMENTATION_PAGE_COMMAND
    __logger__.info('Update documentation design.')
    if distribution_bundle_file:
        new_distribution_bundle_file = FileHandler(location='%s%s%s' % (
            temporary_documentation_folder.path, DOCUMENTATION_BUILD_PATH,
            DISTRIBUTION_BUNDLE_FILE_PATH))
        new_distribution_bundle_file.directory.make_directories()
        distribution_bundle_file.path = new_distribution_bundle_file
        new_distribution_bundle_directory = FileHandler(location='%s%s%s' % (
            temporary_documentation_folder.path, DOCUMENTATION_BUILD_PATH,
            DISTRIBUTION_BUNDLE_DIRECTORY_PATH))
        new_distribution_bundle_directory.make_directories()
        zipfile.ZipFile(distribution_bundle_file.path).extractall(
            new_distribution_bundle_directory.path)
    favicon = FileHandler(location='favicon.png')
    if favicon:
        favicon.copy(target='%s/source/image/favicon.ico' %
            temporary_documentation_folder.path)
    parameter = builtins.dict(builtins.map(lambda item: (
        String(item[0]).camel_case_to_delimited.content.upper(), item[1]
    ), SCOPE.get('documentationWebsite', {}).items()))
    if 'TAGLINE' not in parameter and 'description' in SCOPE:
        parameter['TAGLINE'] = SCOPE['description']
    if 'NAME' not in parameter and 'name' in SCOPE:
        parameter['NAME'] = SCOPE['name']
    __logger__.debug('Found parameter "%s".', json.dumps(parameter))
    api_documentation_path = None
    if has_api_documentation:
        api_documentation_path = '%s%s' % (
            API_DOCUMENTATION_PATH[1], API_DOCUMENTATION_PATH_SUFFIX)
        if not FileHandler(location='%s%s' % (
            FileHandler().path, api_documentation_path
        )).is_directory():
            api_documentation_path = API_DOCUMENTATION_PATH[1]
    parameter.update({
        'CONTENT': CONTENT,
        'CONTENT_FILE_PATH': None,
        'RENDER_CONTENT': False,
        'API_DOCUMENTATION_PATH': api_documentation_path,
        'DISTRIBUTION_BUNDLE_FILE_PATH': DISTRIBUTION_BUNDLE_FILE_PATH if (
            distribution_bundle_file and
            distribution_bundle_file.is_file()
        ) else None
    })
# # python3.5
# #     parameter = Dictionary(parameter).convert(
# #         value_wrapper=lambda key, value: value.replace(
# #             '!', '#%%%#'
# #         ) if builtins.isinstance(value, builtins.str) else value
# #     ).content
    parameter = Dictionary(parameter).convert(
        value_wrapper=lambda key, value: value.replace(
            '!', '#%%%#'
        ) if builtins.isinstance(value, builtins.unicode) else value
    ).content
# #
    if __logger__.isEnabledFor(logging.DEBUG):
        BUILD_DOCUMENTATION_PAGE_COMMAND = \
            BUILD_DOCUMENTATION_PAGE_COMMAND[:-1] + [
                '-debug'
            ] + BUILD_DOCUMENTATION_PAGE_COMMAND[-1:]
    serialized_parameter = json.dumps(parameter)
    parameter_file = FileHandler(location=make_secure_temporary_file('.json')[
        1])
    parameter_file.content = \
        BUILD_DOCUMENTATION_PAGE_PARAMETER_TEMPLATE.format(
            serializedParameter=serialized_parameter, **SCOPE)
    for index, command in builtins.enumerate(BUILD_DOCUMENTATION_PAGE_COMMAND):
        BUILD_DOCUMENTATION_PAGE_COMMAND[index] = \
            BUILD_DOCUMENTATION_PAGE_COMMAND[index].format(
                serializedParameter=serialized_parameter,
                parameterFilePath=parameter_file._path,
                **SCOPE)
    __logger__.debug('Use parameter "%s".', serialized_parameter)
    __logger__.info('Run "%s".', ' '.join(BUILD_DOCUMENTATION_PAGE_COMMAND))
    current_working_directory_backup = FileHandler()
    temporary_documentation_folder.change_working_directory()
    Platform.run(
        command=BUILD_DOCUMENTATION_PAGE_COMMAND[0],
        command_arguments=BUILD_DOCUMENTATION_PAGE_COMMAND[1:], error=False,
        log=True)
    current_working_directory_backup.change_working_directory()
    parameter_file.remove_file()
    for file in FileHandler():
        if not (file in (temporary_documentation_folder, FileHandler(
            location='.%s' % API_DOCUMENTATION_PATH[1]
        )) or is_file_ignored(file)):
            file.remove_deep()
    documentation_build_folder = FileHandler(location='%s%s' % (
        temporary_documentation_folder.path, DOCUMENTATION_BUILD_PATH
    ), must_exist=True)
    documentation_build_folder.iterate_directory(
        function=copy_repository_file, recursive=True,
        source=documentation_build_folder, target=FileHandler())
    if (Platform.run(
        "/usr/bin/env sudo umount '%s'" %
            temporary_documentation_node_modules_directory.path,
        native_shell=True, error=False, log=True
    )['return_code'] == 0):
        temporary_documentation_folder.remove_deep()
    Platform.run(
        (
            '/usr/bin/env git add --all',
            '/usr/bin/env git commit --message "%s" --all' %
                PROJECT_PAGE_COMMIT_MESSAGE,
            '/usr/bin/env git push',
            '/usr/bin/env git checkout master'
        ),
        native_shell=True,
        error=False,
        log=True
    )
Exemple #39
0
def dict(*a, **k):
    import __builtin__
    warnings.warn('twisted.python.util.dict is deprecated.  Use __builtin__.dict instead')
    return __builtin__.dict(*a, **k)
Exemple #40
0
if sys.platform == 'win32':
    __import__('msvcrt').setmode(sys.stdout.fileno(), os.O_BINARY) if hasattr(sys.stdout, 'fileno') else None
    __import__('msvcrt').setmode(sys.stderr.fileno(), os.O_BINARY) if hasattr(sys.stderr, 'fileno') else None

# use the current virtualenv if it exists
builtins._ = os.path.join(user.home.replace('\\', os.sep).replace('/', os.sep), '.python-virtualenv', 'Scripts' if __import__('platform').system() == 'Windows' else 'bin', 'activate_this.py')
if os.path.exists(builtins._): execfile(builtins._, {'__file__':builtins._})

# add ~/.python/* to python module search path
map(sys.path.append, __import__('glob').iglob(os.path.join(user.home.replace('\\', os.sep).replace('/', os.sep), '.python', '*')))

## some functional primitives in the default namespace
# box any specified arguments
fbox = fboxed = lambda *a: a
# return a closure that executes ``f`` with the arguments unboxed.
funbox = lambda f, *a, **k: lambda *ap, **kp: f(*(a + builtins.reduce(operator.add, builtins.map(builtins.tuple, ap), ())), **builtins.dict(k.items() + kp.items()))
# return a closure that will check that its argument is an instance of ``type``.
finstance = lambda *type: frpartial(builtins.isinstance, type)
# return a closure that will check if its argument has an item ``key``.
fhasitem = fitemQ = lambda key: fcompose(fcatch(frpartial(operator.getitem, key)), builtins.iter, builtins.next, fpartial(operator.eq, builtins.None))
# return a closure that will get a particular element from an object
fgetitem = fitem = lambda item, *default: lambda object: default[0] if default and item not in object else object[item] 
# return a closure that will check if its argument has an ``attribute``.
fhasattr = fattributeQ = lambda attribute: frpartial(builtins.hasattr, attribute)
# return a closure that will get a particular attribute from an object
fgetattr = fattribute = lambda attribute, *default: lambda object: getattr(object, attribute, *default)
# return a closure that always returns ``object``.
fconstant = fconst = falways = lambda object: lambda *a, **k: object
# a closure that returns it's argument always
fpassthru = fpass = fidentity = fid = lambda object: object
# a closure that returns a default value if its object is false-y
Exemple #41
0
 def __init__(self, id, name, methods):
     self.id = id
     self.name = name
     self.methods = __builtin__.dict([(m.id, m) for m in methods])
     registry.current_registry.register_class(self)
Exemple #42
0
    def __init__(self, configFilePath, configFile):
        """
        Initialize the SimpleIOHubRuntime Object, loading the experiment configuration file, initializing and launching
        the ioHub server process, and creating the client side device interface to the ioHub devices that have been created.

        Currently the ioHub timer uses a ctypes implementation of direct access to the Windows QPC functions in win32
        (so no python interpreter start time offset is applied between processes) and timeit.default_timer is used for
        all other platforms at this time. The advantage of not having a first read offset applied per python interpreter is that
        it means the both the psychopy process and the ioHub process are using the exact same timebase without a different
        offset that is hard to exactly determine due to the variablility in IPC request-reponses. By the two processes using
        the exact same time space, including offset, getTime() for the the ioHub client in psychopy == the current time of the ioHub server
        process, greatly simplifying some aspects of synconization. This only holds as long as both processes are running
        on the same PC of course.

        Note on timeit.default_timer: As of 2.7, timeit.default_timer correctly selects the best clock based on OS for high
        precision timing. < 2.7, you need to check the OS version yourself and select; or use the psychopy clocks since
        it does the work for you. ;)

        Args:
            configFilePath (str): The absolute path to the experiment configuration .yaml file, which is automatically assigned
            to the path the experiment script is running from by default.
            configFile (str): The name of the experiment configuration .yaml file, which has a default value of 'experiment_config.yaml'

            Return: None
        """

        self.currentTime=computer.currentSec

        self.configFilePath=configFilePath
        self.configFileName=configFile

        self.fullPath= os.path.join(self.configFilePath,self.configFileName)

        # load the experiment config settings from the experiment_config.yaml file.
        # The file must be in the same directory as the experiment script.
        self.configuration=load(file(self.fullPath,u'r'), Loader=Loader)

        self.experimentConfig=dict()
        self._experimentConfigKeys=['title','code','version','description','total_sessions_to_run']
        for key in self._experimentConfigKeys:
            if key in self.configuration:
                self.experimentConfig[key]=self.configuration[key]
 
        self.experimentSessionDefaults=self.configuration['session_defaults']
        self.sessionUserVariables=self.experimentSessionDefaults['user_variables']
        del self.experimentSessionDefaults['user_variables']
        
        # self.hub will hold the reference to the ioHubClient object, used to access the ioHubServer
        # process and devices.
        self.hub=None
        # holds events collected from the ioHub during periods like msecWait()
        self.allEvents=None
        
        # indicates if the experiment is in high priority mode or not. Do not set directly.
        # See enableHighPriority() and disableHighPriority()
        self._inHighPriorityMode=False

        self.sysutil=ioHub.devices.computer

        # initialize the experiment object based on the configuration settings.
        self._initalizeConfiguration()
Exemple #43
0
import sys,os,itertools,operator,functools,user,__builtin__

# use the current virtualenv if it exists
__builtin__._=os.path.join(user.home.replace('\\',os.sep).replace('/',os.sep),'.python-virtualenv','Scripts' if __import__('platform').system() == 'Windows' else 'bin', 'activate_this.py')
if os.path.exists(__builtin__._): execfile(__builtin__._,{'__file__':__builtin__._})

# add ~/.python/* to python module search path
map(sys.path.append,__import__('glob').iglob(os.path.join(user.home.replace('\\',os.sep).replace('/',os.sep),'.python','*')))

## include some functional primitives in the default namespace

# box any specified arguments
box = lambda *a: a
# return a closure that executes ``f`` with the arguments unboxed.
unbox = lambda f, *a, **k: lambda *ap, **kp: f(*(a + __builtin__.reduce(operator.add, __builtin__.map(__builtin__.tuple,ap), ())), **__builtin__.dict(k.items() + kp.items()))
# return a closure that always returns ``n``.
identity = lambda n: lambda *a, **k: n
# return the first, second, or third item of a box.
first, second, third = operator.itemgetter(0), operator.itemgetter(1), operator.itemgetter(2)
# return a closure that executes a list of functions one after another from left-to-right
fcompose = compose = lambda *f: __builtin__.reduce(lambda f1,f2: lambda *a: f1(f2(*a)), __builtin__.reversed(f))
# return a closure that executes function ``f`` whilst discarding any extra arguments
fdiscard = lambda f: lambda *a, **k: f()
# return a closure that executes function ``crit`` and then executes ``f`` or ``t`` based on whether or not it's successful.
fcondition = lambda f, t: lambda crit: lambda *a, **k: t(*a, **k) if crit(*a, **k) else f(*a, **k)
# return a closure that takes a list of functions to execute with the provided arguments
fmaplist = fap = lambda *fa: lambda *a, **k: (f(*a, **k) for f in fa)
#lazy = lambda f, state={}: lambda *a, **k: state[(f,a,__builtin__.tuple(__builtin__.sorted(k.items())))] if (f,a,__builtin__.tuple(__builtin__.sorted(k.items()))) in state else state.setdefault((f,a,__builtin__.tuple(__builtin__.sorted(k.items()))), f(*a, **k))
#lazy = lambda f, *a, **k: lambda *ap, **kp: f(*(a+ap), **dict(k.items() + kp.items()))
# return a memoized closure that's lazy and only executes when evaluated
def lazy(f, *a, **k):
Exemple #44
0
 def decoder(stream):
     return model(**__builtin__.dict([(name, type(stream)) for name, type in parts]))
Exemple #45
0
    def __init__(self, configFilePath, configFile):
        """
        Initialize the SimpleIOHubRuntime Object, loading the experiment configuration file, initializing and launching
        the ioHub server process, and creating the client side device interface to the ioHub devices that have been created.

        Currently the ioHub timer uses a ctypes implementation of direct access to the Windows QPC functions in win32
        (so no python interpreter start time offset is applied between processes) and timeit.default_timer is used for
        all other platforms at this time. The advantage of not having a first read offset applied per python interpreter is that
        it means the both the psychopy process and the ioHub process are using the exact same timebase without a different
        offset that is hard to exactly determine due to the variablility in IPC request-reponses. By the two processes using
        the exact same time space, including offset, getTime() for the the ioHub client in psychopy == the current time of the ioHub server
        process, greatly simplifying some aspects of synconization. This only holds as long as both processes are running
        on the same PC of course.

        Note on timeit.default_timer: As of 2.7, timeit.default_timer correctly selects the best clock based on OS for high
        precision timing. < 2.7, you need to check the OS version yourself and select; or use the psychopy clocks since
        it does the work for you. ;)

        Args:
            configFilePath (str): The absolute path to the experiment configuration .yaml file, which is automatically assigned
            to the path the experiment script is running from by default.
            configFile (str): The name of the experiment configuration .yaml file, which has a default value of 'experiment_config.yaml'

            Return: None
        """

        self.currentTime = computer.currentSec

        self.configFilePath = configFilePath
        self.configFileName = configFile

        self.fullPath = os.path.join(self.configFilePath, self.configFileName)

        # load the experiment config settings from the experiment_config.yaml file.
        # The file must be in the same directory as the experiment script.
        self.configuration = load(file(self.fullPath, u'r'), Loader=Loader)

        self.experimentConfig = dict()
        self._experimentConfigKeys = [
            'title', 'code', 'version', 'description', 'total_sessions_to_run'
        ]
        for key in self._experimentConfigKeys:
            if key in self.configuration:
                self.experimentConfig[key] = self.configuration[key]

        self.experimentSessionDefaults = self.configuration['session_defaults']
        self.sessionUserVariables = self.experimentSessionDefaults[
            'user_variables']
        del self.experimentSessionDefaults['user_variables']

        # self.hub will hold the reference to the ioHubClient object, used to access the ioHubServer
        # process and devices.
        self.hub = None
        # holds events collected from the ioHub during periods like msecWait()
        self.allEvents = None

        # indicates if the experiment is in high priority mode or not. Do not set directly.
        # See enableHighPriority() and disableHighPriority()
        self._inHighPriorityMode = False

        self.sysutil = ioHub.devices.computer

        # initialize the experiment object based on the configuration settings.
        self._initalizeConfiguration()
Exemple #46
0
import cherrypy
import signal
from model.template import Template
from __builtin__ import dict
import os
import controller.RootController


def shutdown(signum, frame):
    print "try shutdown"
    cherrypy.server.stop()


signal.signal(signal.SIGINT, shutdown)

config = dict()
site_config = dict()
config['log.error_file'] = 'err.log'  # error log file
cherrypy.config.update(config)
config.clear()
conf = {
    '/': {
        'tools.staticdir.root': os.getcwd()
    },
    '/static': {
        'tools.staticdir.on': True,
        'tools.staticdir.dir': 'static',

        # we don't need to initialize the database for static files served by CherryPy
        # 'tools.db.on': False
    }
Exemple #47
0
 def decoder(stream):
     return __builtin__.dict([(key(stream), value(stream)) for i in range(int(stream))])
def create_record(model, y, cv, names, fpr_array, tpr_array, thresh_array, oob_estimates, baseline=None):
    
    txt_time = str(datetime.datetime.now())

    final_prediction = np.matrix([[float(y[i]), float(p)] for i,p in oob_estimates.iteritems()]);
    fpr, tpr, thresh = roc_curve(final_prediction[:,0], final_prediction[:,1], 1)
    curr_auc = auc(fpr, tpr)

    #compute the shading over a large number of points
    sp = 1000;
    fpr_points = np.concatenate([fpr, np.logspace(-6,-5, sp), np.linspace(1e-5, 1e-4, sp), np.linspace(1e-4, 1e-3, sp), np.linspace(1e-3, 1e-2, sp), np.linspace(1e-2, 1, sp)])
    fpr_points = np.sort(fpr_points)
    mean_fpr, mean_tpr, std_tpr = compute_stat_cv(fpr_array, tpr_array, fpr_points)

    #get the index
    idx_1e2 = (np.abs(fpr-1e-2)).argmin()
    idx_1e3 = (np.abs(fpr-1e-3)).argmin()
    idx_1e4 = (np.abs(fpr-1e-4)).argmin()

    #get the values
    auc_1e2 = integrate.trapz(tpr[:idx_1e2], fpr[:idx_1e2])*1e2
    auc_1e3 = integrate.trapz(tpr[:idx_1e3], fpr[:idx_1e3])*1e3
    auc_1e4 = integrate.trapz(tpr[:idx_1e4], fpr[:idx_1e4])*1e4
    
    #plt.semilogx(mean_fpr, mean_tpr, 'k-',  label='Mean ROC (area = %0.3f, tpr = %0.3f)' % (mean_auc, mean_tpr[idx_1e3]))
    #plt.xlim([1.0e-4, 1.0])
    
    if baseline is None:
        plt.plot(np.logspace(-10,0, 1000), np.logspace(-10,0, 1000), 'k--')
    else:
        plt.plot(baseline[0], baseline[1],'k--')
    
    plt.fill_between(mean_fpr, mean_tpr - std_tpr, mean_tpr + std_tpr, alpha=.4, label='95% Confidence Interval')
    
    plt.step(fpr, tpr, 'k-',  label='ROC (AUC = %0.6f, AUC_1e-3, = %0.6f, TPR_1e-4 = %0.6f, TPR_1e-3 = %0.6f, )' % (curr_auc, auc_1e3, tpr[idx_1e4], tpr[idx_1e3]))
    
    cv_full = [[train,test] for (train,test) in cv]
    
    plt.xlim([0, 1.0])
    plt.ylim([0, 1.0])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC, Training: %d, Testing: %d, cv: %d' % (len(cv_full[0][0]), len(cv_full[0][1]), len(cv_full)))
    plt.legend(loc="lower right", prop={'size':8})
    plt.tight_layout()
    #plt.show()
        
    saved_params = {}
    try:
        params = model.get_params()
        for k in params:
            if type(params[k]) in (str,int,np.array,list,dict):
                try:
                    json.dumps(params[k])                
                    saved_params[k] = params[k]
                except TypeError:
                    saved_params[k] = repr(params[k])
    except:
        pass    
    
    #store the output
    store = dict()
    store['params'] = saved_params
    store['roc'] = np.column_stack((fpr, tpr, thresh)).tolist()
    #store['std_tpr'] = std_tpr.tolist()
    store['auc'] = float(curr_auc)
    store['tpr_1e2'] = float(mean_tpr[idx_1e2])
    store['auc_1e2'] = float(auc_1e2)
    store['tpr_1e3'] = float(mean_tpr[idx_1e3])
    store['auc_1e3'] = float(auc_1e3)
    store['tpr_1e4'] = float(mean_tpr[idx_1e4])
    store['auc_1e4'] = float(auc_1e4)
    store['cv_size'] = len(cv_full)
    store['size'] = len(cv_full[0][0])+len(cv_full[0][1])
    store['benign_size'] = int((y==0).sum())
    store['malware_size'] = int((y==1).sum())
    store['cv_train'] = len(cv_full[0][0])
    store['cv_test'] = len(cv_full[0][1])
    
    pos = [{'name': names[i], 'p' : float(p), 'label' : float(y[i])} for i,p in oob_estimates.iteritems() if y[i]==1];
    neg = [{'name': names[i], 'p' : float(p), 'label' : float(y[i])} for i,p in oob_estimates.iteritems() if y[i]==0];
    
    pos = sorted(pos, key=lambda v: v['p'])
    neg = sorted(neg, key=lambda v: -v['p'])
    
    store['top_fp'] = neg[0:500];    
    store['top_fn'] = pos[0:500];
    
    val_results = {}
    val_results['oob_estimates'] = list(np.concatenate((neg, pos)))
            
    #save the model
    img_results = {}
    
    buf = io.BytesIO()    
    plt.xlim([0, 0.0001])
    plt.savefig(buf)
    img_results['img_0_0001'] = buf
    
    buf = io.BytesIO()    
    plt.xlim([0, 0.001])
    plt.savefig(buf)
    img_results['img_0_001'] = buf

    buf = io.BytesIO()    
    plt.xlim([0, 0.01])
    plt.savefig(buf)
    img_results['img_0_01'] = buf
   
    buf = io.BytesIO()    
    plt.xlim([0, 0.1])
    plt.savefig(buf)
    img_results['img_0_1'] = buf

    buf = io.BytesIO()    
    plt.xlim([0, 1])
    plt.savefig(buf)
    img_results['img_1'] = buf
    
    buf = io.BytesIO()    
    plt.xlim([1e-6, 1])
    plt.xscale('log')
    plt.savefig(buf)
    img_results['img_log'] = buf

    results = {}
    results['time'] = txt_time
    results['output'] = store
    results['validation'] = val_results
    results['images'] = img_results
    
    plt.close()

    return results
Exemple #49
0
 def lazy(*ap, **kp):
     A, K = a+ap, sortedtuple(k.items() + kp.items())
     return state[(A, K)] if (A, K) in state else state.setdefault((A, K), f(*A, **builtins.dict(k.items()+kp.items())))
def extract_feature_matrix(df_comments, df_thread_groupby):
    print "START"
    # Sentence Tokenizer
    sentencer = SentenceTokenizer()
    
    clf = load_classifier(sentiment_path + 'sentiment_classifier.pickle')
        
    featureMatrix = np.empty([df_comments.shape[0],25])
    
    feature_dict = dict()
    for ix, row in df_comments.iterrows():
        feature_dict[row['comment_id']] = ix
    
    feature_count = 0
    
    for _,row in df_comments.iterrows():
        index = feature_dict[row['comment_id']]
        
        comm = row['comment_content'].decode('ASCII', 'ignore')
        tokens = words(comm)
        unique_tokens = set(tokens)
        sentences = sentencer.tokenize(comm)
        
        featureMatrix[index][3] =  len(comm)
        
        verb_fr, noun_fr, pronoun_fr = pos_freq(tokens)
        featureMatrix[index][4] = verb_fr
        featureMatrix[index][5] = noun_fr
        featureMatrix[index][6] = pronoun_fr
        
        featureMatrix[index][7] = capital_frequency(tokens)
        featureMatrix[index][8] = sent_frequency(sentences, '?')
        featureMatrix[index][9] = sent_frequency(sentences, '!')
        featureMatrix[index][10] = sentence_capital_frequency(sentences)
        
        featureMatrix[index][11] = entropy(comm)
        featureMatrix[index][12] = lexical_diversity(tokens)
        
        
        if len(tokens) == 0:
            featureMatrix[index][13] =  0
            featureMatrix[index][14] =  0
            featureMatrix[index][15] =  0
            featureMatrix[index][16] =  0
        else:
            spelt_wrong = missing_words(unique_tokens)
            bad_words_list = swears(unique_tokens)
            
            featureMatrix[index][13] =  len(spelt_wrong)
            featureMatrix[index][14] =  len(spelt_wrong)/float(len(unique_tokens))
            featureMatrix[index][15] =  len(bad_words_list)
            featureMatrix[index][16] =  len(bad_words_list)/float(len(unique_tokens))
            
            
        featureMatrix[index][19] =  F_K_score(sentences, tokens)
        
        testSet = dict()
        refWords = make_full_dict(tokens)
        testSet.update(refWords)
    
        probDist = clf.prob_classify(testSet)                
        sentiment = probDist.prob('pos')            
        subj_obj = get_subjectivity(probDist)
    
        polarity_overlap = get_polarity_overlap(words(row['article_body']), tokens, clf)
        featureMatrix[index][22] =  sentiment
        featureMatrix[index][23] =  subj_obj
        featureMatrix[index][24] =  polarity_overlap
        
        feature_count += 1
        if feature_count % 1000 == 0:
            print feature_count
    
    print "DONE"
    
    feature_count = 0
    # Grouped
    for _,group in df_thread_groupby:
        thread_comments = [row['comment_content'] for _,row in group.iterrows()]
        
        # Get average time
        sumTime = 0 
        count = 0                
        previous = mktime(group.iloc[0]['date'])
        first = mktime(group.iloc[0]['date'])
        
        # Average length
        sumLen = 0 
        
        
        thread_tokens = []    
        
        # Within Thread
        for _, row in group.iterrows():
            index = feature_dict[row['comment_id']]
            comm = row['comment_content'].decode('ascii','ignore')
            tokens = words(comm)
            sentences = sentencer.tokenize(comm)
            
            # Ongoing average time
            sumTime += mktime(row['date']) - previous
            count += 1            
            avgTime = sumTime/float(count)
            
            # Ongoing average length
            sumLen += len(words(row['comment_content']))
            avgLen = sumLen/float(count)
            
            ######################################################################
            # Get chunked sentences
            for sent in sentences:
                sent_tokens = words(sent)
                sent_tokens_tagged = nltk.pos_tag(sent_tokens)
                chunks = nltk.ne_chunk(sent_tokens_tagged, binary=True)
                doc = [] 
                for chunk in chunks:
                    if type(chunk) == nltk.Tree:
                        doc.append(' '.join(c[0] for c in chunk.leaves()))
                    else:
                        doc.append(chunk[0])
                doc = [word.strip(string.punctuation) for word in doc if len(word.strip(string.punctuation)) > 1]
                
                # The cumulative tokens up to this point
                thread_tokens += doc
            
            ######################################################################
            article_tokens = []
            article_sentences = sentencer.tokenize(row['article_body'])
            for sent in article_sentences:
                sent_tokens = words(sent)
                sent_tokens_tagged = nltk.pos_tag(sent_tokens)
                chunks = nltk.ne_chunk(sent_tokens_tagged, binary=True)
                doc = []
                for chunk in chunks:
                    if type(chunk) == nltk.Tree:
                        doc.append(' '.join(c[0] for c in chunk.leaves()))
                    else:
                        doc.append(chunk[0])
                article_tokens = [word.strip(string.punctuation) for word in doc if len(word.strip(string.punctuation)) > 1]
            
            ######################################################################
            
            
            featureMatrix[index][0] = timeliness(mktime(row['date']), previous, max(avgTime, 1))
            previous = mktime(row['date'])        
            
            featureMatrix[index][1] =  mktime(row['date']) - first  
            
            featureMatrix[index][2] = lengthiness(words(row['comment_content']), max(avgLen, 1))  
            
            featureMatrix[index][17] =  np.mean([termf(comm.count(w), tokens) for w in set(tokens)])  
            featureMatrix[index][18] =  tf_idf(comm, thread_comments)     
            
            featureMatrix[index][20] =  onSubForumTopic(tokens, thread_tokens)
            featureMatrix[index][21] =  onSubForumTopic(tokens, article_tokens)
    
    
            feature_count += 1
            if feature_count % 1000 == 0:
                print feature_count
    
    return featureMatrix
Exemple #51
0
 def __init__(self, filename, empty_user = set()):
     '''
     filename: inits the UBRR data from the input file
     empty_user: skip the reviews by this user (keeps the ratings)
     '''
     self.empty_user = empty_user
     
     ur_map = dict()
     br_map = dict()
     
     cnt = 0
     skipped = 0
     
     #read the file
     if filename.endswith('.gz'):
         f = gzip.open(filename, 'r')
     else:
         f = open(filename, 'r')
     
     for line in f:
         vals = line.split("\t")
         if len(vals) == 0:
             continue
         
         u = vals[0]
         b = vals[1]
         r = float(vals[2])
         d = vals[3].strip()
         if u in self.empty_user:
             #we are skipping this review
             d = ''
             skipped += 1
         
         rev = Review(u, b, r, d)  #review obj
         
         
         #store biz -> list of reviews
         if not br_map.has_key(b):
             br_map[b] = []
         
         br_map[b].append(rev)
         
         #store user -> list of reviews
         if not ur_map.has_key(u):
             ur_map[u] = []
             
         ur_map[u].append(rev)
         
         cnt += 1
         
     
     self.biz_map = br_map
     self.user_map = ur_map
     
     
     f.close()
     print 'Review Data Manager Initialized with ', cnt, ' reviews'
     print 'Number of skipped users = ', len(self.empty_user)
     print 'Number of skipped reviews = ', skipped