Ejemplo n.º 1
0
def checkPointMatchesFocal(dataLines):
    '''
    Checks data in dataLines for "point" lines where the indicated focal
    individual doesn't match the "current" focal individual and/or their dates
    don't match.  "Current", meaning the individual listed in the most-recent
    same-day focal header.  Includes any "point" lines where there is no most-
    recent same-day focal header.
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.
    
    Returns a list of list of strings: the "point" lines that don't match the
    current focal.
    '''
    from constants import focalAbbrev, pntAbbrev
    
    lastFocal = []
    nonMatchingPoints = []
    
    for line in dataLines:
        if isType(line, focalAbbrev):
            lastFocal = line[:]
        elif isType(line, pntAbbrev):
            if lastFocal == []: #PNT with no HDR yet, report this
                nonMatchingPoints.append(line)
            elif not sameActor(lastFocal, line) or not sameDate(lastFocal, line):
                nonMatchingPoints.append(line)

    return nonMatchingPoints    
Ejemplo n.º 2
0
def countPointsPerFocal(dataLines):
    '''
    Counts the number of "point" lines recorded during each focal sample.
    Returns a list of ([focal header list of strings], number of points)
    tuples.
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.  They are also presumed to be in chronological
    order.
    '''
    from constants import focalAbbrev, pntAbbrev
    
    focalCounts = {}
    lastFocal = 'NONE YET'
    focalCounts[lastFocal] = 0
    
    for line in dataLines:
        if isType(line, focalAbbrev):
            lastFocal = '\t'.join(line)
            focalCounts[lastFocal] = 0
        elif isType(line, pntAbbrev):
            focalCounts[lastFocal] += 1
    
    # Counting done, now convert focals back to string lists. Keep them sorted!
    outLines = []
    
    for (focal, count) in sorted(focalCounts.items()):
        focalAsList =  focal.split('\t')
        outLines.append([focalAsList, count])
    
    return outLines
Ejemplo n.º 3
0
def checkNeighborsPerPoint(dataLines):
    '''
    Counts the number of neighbor lines for each "point" line in dataLines.
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.
    
    Returns a list of (point, number of neighbors) tuples.  The "point" is a 
    string: the items from a "point" list of strings joined and tab-delimited.
    The "number of neighbors" is an integer.
    '''
    from constants import pntAbbrev, neighborAbbrev
    
    lastPoint = 'NONE YET'
    pointsAndCounts = {}
    pointsAndCounts[lastPoint[:]] = 0
    
    for line in dataLines:
        if isType(line, pntAbbrev):
            lastPoint = '\t'.join(line)
            pointsAndCounts[lastPoint[:]] = 0
        elif isType(line, neighborAbbrev):
            pointsAndCounts[lastPoint[:]] += 1
    
    return sorted(pointsAndCounts.items(), key = lambda pair: pair[0])
Ejemplo n.º 4
0
def checkNeighborsPerPoint(dataLines):
    '''
    Counts the number of neighbor lines for each "point" line in dataLines.
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.
    
    Returns a list of (point, number of neighbors) tuples.  The "point" is a 
    string: the items from a "point" list of strings joined and tab-delimited.
    The "number of neighbors" is an integer.
    '''
    from constants import pntAbbrev, neighborAbbrev
    
    lastPoint = 'NONE YET'
    pointsAndCounts = {}
    pointsAndCounts[lastPoint[:]] = 0
    
    for line in dataLines:
        if isType(line, pntAbbrev):
            lastPoint = '\t'.join(line)
            pointsAndCounts[lastPoint[:]] = 0
        elif isType(line, neighborAbbrev):
            pointsAndCounts[lastPoint[:]] += 1
    
    return sorted(pointsAndCounts.items(), key = lambda pair: pair[0])
Ejemplo n.º 5
0
def checkActorIsActee(dataLines):
    '''
    Checks ad-lib and neighbor lines in dataLines for cases where the two
    indicated individuals are the same. 
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.
    
    Returns a list of lists of strings: the lines where this is true.
    '''
    from constants import adlibAbbrev, neighborAbbrev
    
    linesOfInterest = [line for line in dataLines if isType(line, adlibAbbrev) or isType(line, neighborAbbrev)]
    
    return [line for line in linesOfInterest if line[5] == line[7]]
Ejemplo n.º 6
0
def checkActorIsActee(dataLines):
    '''
    Checks ad-lib and neighbor lines in dataLines for cases where the two
    indicated individuals are the same. 
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.
    
    Returns a list of lists of strings: the lines where this is true.
    '''
    from constants import adlibAbbrev, neighborAbbrev
    
    linesOfInterest = [line for line in dataLines if isType(line, adlibAbbrev) or isType(line, neighborAbbrev)]
    
    return [line for line in linesOfInterest if line[5] == line[7]]
Ejemplo n.º 7
0
def checkDuplicateFocals(dataLines):
    '''
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.  The following are assumed true about the lists of
    strings:
        1) The line "type" is indicated by the string at [0]
        2) The date for event is at [2]
        3) For lines indicating a new focal sample, the focal's ID is at [5]
    
    Gathers all the lines indicating the beginning of new focal samples, and
    checks for cases where the same individual was sampled more than once in the
    same day.
    
    Returns a list of (date, name) tuples (date and name are both strings),
    sorted by date, listing all duplicate focals. If none found, returns an
    empty list.
    '''
    from constants import focalAbbrev
    
    # Make list of (date, name) tuples for each focal sample
    dateNames = [(line[2],line[5]) for line in dataLines if isType(line, focalAbbrev)]
    
    duplicateFocals = set() 
    for focal in dateNames:
        if dateNames.count(focal) > 1:
            duplicateFocals.add(focal)
    
    if len(duplicateFocals) > 0: # Then we have some duplicates
        return sorted(list(duplicateFocals), key = lambda focal: focal[0])

    return []
Ejemplo n.º 8
0
def countFocalTypes(dataLines):
    '''
    Counts the number of lines in dataLines that are focal headers, grouped by
    focal sample type (juvenile, adult female, or other).
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.
    
    Returns a dictionary whose keys are all the focal sample types that occur in
    the data, and whose values are the number of times those lines occur.
    '''
    from constants import focalAbbrev, stypeAdultFem, stypeJuv, stypeOther
    
    allFocals = [line for line in dataLines if isType(line, focalAbbrev)]
    
    focalTypes = [stypeAdultFem, stypeJuv]
    
    focalsDict = {}
    focalsDict[stypeAdultFem] = 0
    focalsDict[stypeJuv] = 0
    focalsDict[stypeOther] = 0
    
    for focal in allFocals:
        if focal[6] in focalTypes:
            focalsDict[focal[6]] += 1
        else:
            focalsDict[stypeOther] += 1
    
    # Remove the "other" stype from the dictionary if it's zero. It SHOULD
    # always be zero, so it's only noteworthy when it's > 0.
    
    if focalsDict[stypeOther] == 0:
        focalsDict.pop(stypeOther)
    
    return focalsDict
Ejemplo n.º 9
0
def checkDuplicateFocals(dataLines):
    '''
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.  The following are assumed true about the lists of
    strings:
        1) The line "type" is indicated by the string at [0]
        2) The date for event is at [2]
        3) For lines indicating a new focal sample, the focal's ID is at [5]
    
    Gathers all the lines indicating the beginning of new focal samples, and
    checks for cases where the same individual was sampled more than once in the
    same day.
    
    Returns a list of (date, name) tuples (date and name are both strings),
    sorted by date, listing all duplicate focals. If none found, returns an
    empty list.
    '''
    from constants import focalAbbrev
    
    # Make list of (date, name) tuples for each focal sample
    dateNames = [(line[2],line[5]) for line in dataLines if isType(line, focalAbbrev)]
    
    duplicateFocals = set() 
    for focal in dateNames:
        if dateNames.count(focal) > 1:
            duplicateFocals.add(focal)
    
    if len(duplicateFocals) > 0: # Then we have some duplicates
        return sorted(list(duplicateFocals), key = lambda focal: focal[0])

    return []
Ejemplo n.º 10
0
def checkNeighborNotReal(dataLines):
    '''
    Checks neighbor lines in dataLines for cases where the neighbor is noted as
    "INF" (a not-yet-named infant) or some other placeholder-type value.
    
    All legitimate names used as a neighbor should be exactly 3 characters, so
    any names that aren't will be flagged here, whether or not they were
    specifically listed as possible "placeholder" values beforehand.
    
    This function is different from checkActorActeeNotReal in that it uses a
    different (smaller) set of "placeholder" values.  Some of values used as
    neighbors are not allowed for use in ad-libs.  See Babase documentation.
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.
    
    Returns a list of lists of strings: the lines where this is true.
    '''
    from constants import unnamedCodes, neighborAbbrev
    
    # Make a set of known "placeholder" codes to check for 
    plcHoldrs = set(unnamedCodes)
    
    linesOfInterest = [line for line in dataLines if isType(line, neighborAbbrev)]
    
    return [line for line in linesOfInterest if line[5] in plcHoldrs or line[7] in plcHoldrs or len(line[5]) != 3 or len(line[7]) != 3]
Ejemplo n.º 11
0
def checkMountsConsortsInvolvedFocal(dataLines):
    '''
    Checks data for cases where a mount, e*********n, or consort was recorded
    and makes sure either the actor or actee was the focal individual. Returns a
    list of list of strings representing all the cases where this is true.
    
    Checks both "note" lines and "ad-lib" lines for these behaviors.
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split. [0] in each list of strings is the "type" of data
    recorded in that line.
    '''
    from constants import focalAbbrev, noteAbbrev, adlibAbbrev, bb_consort, bb_mount, bb_ejaculation
    
    mountsEtc = [bb_consort, bb_mount, bb_ejaculation]
    outLines = []
    lastFocal = []
    focalIndiv = ''
    
    # Gather lines of interest
    for line in dataLines:
        if isType(line, focalAbbrev):
            lastFocal = line[:]
            focalIndiv = lastFocal[5].upper()

        elif isType(line, noteAbbrev) and behaviorsInNote(line, mountsEtc):
            if focalIndiv == '': # no focal yet
                outLines.append(line)
                continue
            interaction = (line[-1]).split() # SHOULD be [actor, act, actee]
            if interaction[1].upper() in mountsEtc: # this is an admittedly poor attempt to parse actor/actee from a note
                actor = interaction[0].upper()
                actee = interaction[2].upper()
                if focalIndiv not in [actor, actee]:
                    outLines.append(line)

        elif isType(line, adlibAbbrev) and line[6] in mountsEtc:
            if focalIndiv == '': # no focal yet
                outLines.append(line)
            else:
                actor = line[5]
                actee = line[7]
                if focalIndiv not in [actor, actee]:
                    outLines.append(line)
    
    return outLines
Ejemplo n.º 12
0
def checkMountsConsortsInvolvedFocal(dataLines):
    '''
    Checks data for cases where a mount, e*********n, or consort was recorded
    and makes sure either the actor or actee was the focal individual. Returns a
    list of list of strings representing all the cases where this is true.
    
    Checks both "note" lines and "ad-lib" lines for these behaviors.
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split. [0] in each list of strings is the "type" of data
    recorded in that line.
    '''
    from constants import focalAbbrev, noteAbbrev, adlibAbbrev, bb_consort, bb_mount, bb_ejaculation
    
    mountsEtc = [bb_consort, bb_mount, bb_ejaculation]
    outLines = []
    lastFocal = []
    focalIndiv = ''
    
    # Gather lines of interest
    for line in dataLines:
        if isType(line, focalAbbrev):
            lastFocal = line[:]
            focalIndiv = lastFocal[5].upper()

        elif isType(line, noteAbbrev) and behaviorsInNote(line, mountsEtc):
            if focalIndiv == '': # no focal yet
                outLines.append(line)
                continue
            interaction = (line[-1]).split() # SHOULD be [actor, act, actee]
            if interaction[1].upper() in mountsEtc: # this is an admittedly poor attempt to parse actor/actee from a note
                actor = interaction[0].upper()
                actee = interaction[2].upper()
                if focalIndiv not in [actor, actee]:
                    outLines.append(line)

        elif isType(line, adlibAbbrev) and line[6] in mountsEtc:
            if focalIndiv == '': # no focal yet
                outLines.append(line)
            else:
                actor = line[5]
                actee = line[7]
                if focalIndiv not in [actor, actee]:
                    outLines.append(line)
    
    return outLines
Ejemplo n.º 13
0
def checkMountsConsortsDuringFocal(dataLines):
    '''
    Checks if mounts, ejaculations, and consorts were recorded during a focal 
    sample. Returns a list of list of strings representing all the cases that
    were outside a focal sample.
    
    Checks both "note" lines and "ad-lib" lines for these behaviors.
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split. [0] in each list of strings is the "type" of data
    recorded in that line.
    '''
    from constants import focalAbbrev, noteAbbrev, adlibAbbrev, bb_consort, bb_mount, bb_ejaculation
    from datetime import datetime
    
    mountsEtc = [bb_consort, bb_mount, bb_ejaculation]
    outLines = []
    lastFocal = []
    focalEnd = ''
    
    for line in dataLines:
        if isType(line, focalAbbrev):
            lastFocal = line[:]
            focalEnd = ' '.join([lastFocal[2], lastFocal[7]])
            focalEnd = datetime.strptime(focalEnd, '%Y-%m-%d %H:%M:%S')
        
        # Check for behaviors in notes
        elif isType(line, noteAbbrev) and behaviorsInNote(line, mountsEtc):
            if len(lastFocal) == 0: # no focal yet
                outLines.append(line)
            elif not duringFocal(line, focalEnd):
                outLines.append(line)

        # Check for behaviors in ad-libs
        elif isType(line, adlibAbbrev) and line[6] in mountsEtc:
            if len(lastFocal) == 0: # no focal yet
                outLines.append(line)
            elif not duringFocal(line, focalEnd):
                outLines.append(line)
    
    return outLines
Ejemplo n.º 14
0
def checkMountsConsortsDuringFocal(dataLines):
    '''
    Checks if mounts, ejaculations, and consorts were recorded during a focal 
    sample. Returns a list of list of strings representing all the cases that
    were outside a focal sample.
    
    Checks both "note" lines and "ad-lib" lines for these behaviors.
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split. [0] in each list of strings is the "type" of data
    recorded in that line.
    '''
    from constants import focalAbbrev, noteAbbrev, adlibAbbrev, bb_consort, bb_mount, bb_ejaculation
    from datetime import datetime
    
    mountsEtc = [bb_consort, bb_mount, bb_ejaculation]
    outLines = []
    lastFocal = []
    focalEnd = ''
    
    for line in dataLines:
        if isType(line, focalAbbrev):
            lastFocal = line[:]
            focalEnd = ' '.join([lastFocal[2], lastFocal[7]])
            focalEnd = datetime.strptime(focalEnd, '%Y-%m-%d %H:%M:%S')
        
        # Check for behaviors in notes
        elif isType(line, noteAbbrev) and behaviorsInNote(line, mountsEtc):
            if len(lastFocal) == 0: # no focal yet
                outLines.append(line)
            elif not duringFocal(line, focalEnd):
                outLines.append(line)

        # Check for behaviors in ad-libs
        elif isType(line, adlibAbbrev) and line[6] in mountsEtc:
            if len(lastFocal) == 0: # no focal yet
                outLines.append(line)
            elif not duringFocal(line, focalEnd):
                outLines.append(line)
    
    return outLines
Ejemplo n.º 15
0
def checkFocalInfantStatus(dataLines, moms):
    '''
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.  The following are assumed true about the lists
    of strings:
        1) The line "type" is indicated by the string at [0]
        2) The date and time of the event are at [2] and [3] respectively
        3) In lines indicating a focal point, the focal individual's ID is at
            [5] and their activity/position/etc. are in [6].
        4) Data in [6] indicate presence/absence of an infant if the string is
            exactly 4 characters long.  Characters 3-4 are about the infant.
            If she has no infant, character #3 will be pntActNoInfant. If she
            does have one, it will be anything else.
    
    Gathers all the lines from dataLines that represent individual "points" in
    a focal sample, then checks each one to see if the row's data says
    anything about the presence or absence of the focal's infant. Next, this
    function uses the provided dictionary, moms, to see if the focal really
    did or did not have an infant on that day. Any rows where the focal data
    disagrees with "moms", regarding whether or not she has an infant are
    returned. Each row has an extra string appended to the end, indicating her
    infant status according to "moms": "(HAS INFANT)" or "(NO INFANT)".
    
    Returns a list of lists of strings, the aforementioned rows. If no rows
    found with this discrepancy, returns an empty list.
    '''
    from constants import pntAbbrev, pntActNoInfant
    
    thePnts = [line for line in dataLines if isType(line,pntAbbrev)]
    wrongInfPnts = []
    
    # Dict with string explanation of whether the focal individual has an
    # infant according to "moms"
    momsStr = {}
    momsStr[True] = '(HAS INFANT)'
    momsStr[False] = '(NO INFANT)'
    
    for pnt in thePnts:
        if len(pnt[6]) < 4:
            # This point does not say anything about infants. Ignore.
            continue
        # Else, it does say something about infants. Does it mention one, or
        # does it specifically say that she doesn't have one?
        pntSaysInfant = (pnt[6][2] != pntActNoInfant)
        # Now, what does other data (demography data, presumably) say about
        # whether she has an infant?
        demogSaysInfant = hasInfant(pnt, moms)
        if pntSaysInfant != demogSaysInfant: # Discrepant! Add to return list.
            outLine = pnt[:]
            outLine.append(momsStr[demogSaysInfant])
            wrongInfPnts.append(outLine)
    
    return wrongInfPnts
Ejemplo n.º 16
0
def checkNotesNoFocals(dataLines):
    '''
    Checks data in dataLines for "note" lines on days with no focal samples
    recorded.  This is important, because these notes will not be recorded
    in Babase.
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.
    
    Returns a list of list of strings: the "note" lines on days with no focals.
    '''
    from constants import focalAbbrev, noteAbbrev
    
    focalDates = set()
    notes = []
    
    for line in dataLines:
        if isType(line, focalAbbrev):
            focalDates.add(line[2]) # Add the focal date
        elif isType(line, noteAbbrev):
            notes.append(line)
    
    return [note for note in notes if note[2] not in focalDates]
Ejemplo n.º 17
0
def checkNotesNoFocals(dataLines):
    '''
    Checks data in dataLines for "note" lines on days with no focal samples
    recorded.  This is important, because these notes will not be recorded
    in Babase.
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.
    
    Returns a list of list of strings: the "note" lines on days with no focals.
    '''
    from constants import focalAbbrev, noteAbbrev
    
    focalDates = set()
    notes = []
    
    for line in dataLines:
        if isType(line, focalAbbrev):
            focalDates.add(line[2]) # Add the focal date
        elif isType(line, noteAbbrev):
            notes.append(line)
    
    return [note for note in notes if note[2] not in focalDates]
Ejemplo n.º 18
0
def checkBehavsInNotes(dataLines, criteriaBehavs):
    '''
    Checks all "note" lines for cases where any of the behaviors in
    criteriaBehavs occur.
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.
    
    Returns a list of lists of strings: the lines where this is true.
    '''
    from constants import noteAbbrev
    
    notes = [line for line in dataLines if isType(line, noteAbbrev)]
    
    return [note for note in notes if behaviorsInNote(note, criteriaBehavs)]
Ejemplo n.º 19
0
def getPointsPerFocal(dataLines):
    '''
    Gathers the "point" lines recorded during each focal sample. Returns a
    dictionary whose keys are focal headers (each a single string), and whose
    values are lists of associated points (each its own list of strings).
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.  They are also presumed to be in chronological
    order.
    '''
    from constants import focalAbbrev, pntAbbrev
    
    focalCounts = {}
    lastFocal = 'NONE YET'
    focalCounts[lastFocal] = []
    
    for line in dataLines:
        if isType(line, focalAbbrev):
            lastFocal = '\t'.join(line)
            focalCounts[lastFocal] = []
        elif isType(line, pntAbbrev):
            focalCounts[lastFocal].append(line)
    
    return focalCounts
Ejemplo n.º 20
0
def checkBehavsInNotes(dataLines, criteriaBehavs):
    '''
    Checks all "note" lines for cases where any of the behaviors in
    criteriaBehavs occur.
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.
    
    Returns a list of lists of strings: the lines where this is true.
    '''
    from constants import noteAbbrev
    
    notes = [line for line in dataLines if isType(line, noteAbbrev)]
    
    return [note for note in notes if behaviorsInNote(note, criteriaBehavs)]
Ejemplo n.º 21
0
def countLines(dataLines, sampleType=''):
    '''
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split. sampleType is a string indicating which "type" of
    data to count in dataLines.
    
    Counts the number of lines in dataLines are of "type" sampleType. If
    sampleType is not provided or is the empty string, counts all lines.
    
    Returns an integer, the number of lines.
    '''
    
    if sampleType == '':
        return len(dataLines)
    
    return len([line for line in dataLines if isType(line, sampleType)])
Ejemplo n.º 22
0
def countLines(dataLines, sampleType=''):
    '''
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split. sampleType is a string indicating which "type" of
    data to count in dataLines.
    
    Counts the number of lines in dataLines are of "type" sampleType. If
    sampleType is not provided or is the empty string, counts all lines.
    
    Returns an integer, the number of lines.
    '''
    
    if sampleType == '':
        return len(dataLines)
    
    return len([line for line in dataLines if isType(line, sampleType)])
Ejemplo n.º 23
0
def checkInvalidFocalTypes(dataLines):
    '''
    Checks all focal header lines for invalid focal sample types.  These will
    occur, for example, if an observer accidentally starts a focal on an adult
    male.
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.
    
    Returns a list of lists of strings: the lines where this is true.
    '''
    from constants import focalAbbrev, stypeAdultFem, stypeJuv
    
    focals = [line for line in dataLines if isType(line, focalAbbrev)]
    
    return [focal for focal in focals if focal[6] not in [stypeAdultFem, stypeJuv]]
Ejemplo n.º 24
0
def pointsOutOfSight(dataLines):
    '''
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.  [0] in each list of strings is the "type" of data
    recorded in that line. In "point" lines, [6] is the point's "activity", or
    "out of sight".
    
    Searches through the data for "out of sight" points and gathers them into a
    list.
    
    Returns a list of lists of strings: all the "out of sight" lines.
    '''
    from constants import pntAbbrev, outOfSightValue
    
    oosLines =[line for line in dataLines if isType(line, pntAbbrev) and line[6] == outOfSightValue]
    
    return oosLines
Ejemplo n.º 25
0
def pointsOutOfSight(dataLines):
    '''
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.  [0] in each list of strings is the "type" of data
    recorded in that line. In "point" lines, [6] is the point's "activity", or
    "out of sight".
    
    Searches through the data for "out of sight" points and gathers them into a
    list.
    
    Returns a list of lists of strings: all the "out of sight" lines.
    '''
    from constants import pntAbbrev, outOfSightValue
    
    oosLines =[line for line in dataLines if isType(line, pntAbbrev) and line[6] == outOfSightValue]
    
    return oosLines
Ejemplo n.º 26
0
def kenyaLinesPerDay(dataLines, sampleType='', typeName=''):
    '''
    Just like the "countLinesPerDay" function in errorCheckingHelpers, but
    this one adjusts lots of the formatting for the sake of our Kenyan
    observers.  Also adds a "typeName" parameter, allowing a more-common
    word/phrase to use in output instead of the sampleType.
    
    e.g. if sampleType = focalAbbrev, typeName might be "Focal Sample".
    '''
    # Get all possible dates from dataLines and add them to a dictionary that
    # will count lines per date.
    dateCounts = {}

    for line in dataLines:
        if line[2] not in dateCounts:
            dateCounts[line[2]] = 0

    # Condense dataLines into only those with correct sampleType.
    #
    # Do this after collecting possible dates so we can keep dates with zero lines.
    theseLines = []
    if sampleType == '':
        theseLines = dataLines[:]
    else:
        theseLines = [line for line in dataLines if isType(line, sampleType)]

    # Go through data and count lines per date
    for line in theseLines:
        dateCounts[line[2]] += 1

    # Handle typeName, if provided.
    lineType = typeName or sampleType or 'Line'

    # Write the results
    resultInfo = []
    commentLine = lineType + 's Collected Per Day:'
    resultInfo.append(commentLine)

    for (date, count) in sorted(dateCounts.items(), key=lambda pair: pair[0]):
        kenyaDate = kenyaDateTime(date, False)
        commentLine = '\t' + kenyaDate + ':\t' + str(count)
        resultInfo.append(commentLine)

    return '\n'.join(resultInfo)
Ejemplo n.º 27
0
def countLinesPerDay(dataLines, sampleType=''):
    '''
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split. sampleType is a string indicating which "type" of
    data to count in dataLines.  Each line's date should be at [2].
    
    Gets all distinct dates from the data in dataLines, then notes how many
    lines of "type" sampleType occurred each day. If sampleType is not provided
    or is the empty string, counts all lines.
    
    Returns a single string that will include several line breaks.
    '''
    
    # Get all possible dates from dataLines and add them to a dictionary that
    # will count lines per date.
    dateCounts = {}
    
    for line in dataLines:
        if line[2] not in dateCounts:
            dateCounts[line[2]] = 0
    
    # Condense dataLines into only those with correct sampleType.
    #
    # Do this after collecting possible dates so we can keep dates with zero lines. 
    theseLines = []
    if sampleType == '':
        theseLines = dataLines[:]
    else:
        theseLines = [line for line in dataLines if isType(line, sampleType)]
    
    # Go through data and count lines per date
    for line in theseLines:
        dateCounts[line[2]] += 1
            
    # Write the results
    resultInfo = []
    commentLine = sampleType + ' Lines Collected Per Day:'
    resultInfo.append(commentLine)
    
    for (date, count) in sorted(dateCounts.items(), key = lambda pair: pair[0]):
        commentLine = '\t' + date + ':\t' + str(count)
        resultInfo.append(commentLine)
    
    return '\n'.join(resultInfo)
Ejemplo n.º 28
0
def countLinesPerDay(dataLines, sampleType=''):
    '''
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split. sampleType is a string indicating which "type" of
    data to count in dataLines.  Each line's date should be at [2].
    
    Gets all distinct dates from the data in dataLines, then notes how many
    lines of "type" sampleType occurred each day. If sampleType is not provided
    or is the empty string, counts all lines.
    
    Returns a single string that will include several line breaks.
    '''
    
    # Get all possible dates from dataLines and add them to a dictionary that
    # will count lines per date.
    dateCounts = {}
    
    for line in dataLines:
        if line[2] not in dateCounts:
            dateCounts[line[2]] = 0
    
    # Condense dataLines into only those with correct sampleType.
    #
    # Do this after collecting possible dates so we can keep dates with zero lines. 
    theseLines = []
    if sampleType == '':
        theseLines = dataLines[:]
    else:
        theseLines = [line for line in dataLines if isType(line, sampleType)]
    
    # Go through data and count lines per date
    for line in theseLines:
        dateCounts[line[2]] += 1
            
    # Write the results
    resultInfo = []
    commentLine = sampleType + ' Lines Collected Per Day:'
    resultInfo.append(commentLine)
    
    for (date, count) in sorted(dateCounts.items(), key = lambda pair: pair[0]):
        commentLine = '\t' + date + ':\t' + str(count)
        resultInfo.append(commentLine)
    
    return '\n'.join(resultInfo)
Ejemplo n.º 29
0
def checkFocalOverlaps(dataLines):
    '''
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split. [0] in each list of strings is the "type" of data
    recorded in that line.
    
    Checks for overlapping focals in dataLines, and returns a list of
    (focal1, focal2) tuples. focal1 and 2 are both lists of strings.
        focal1 is a "header" line, focal2 is the "header" of the last focal to
        occur during focal1
    '''
    
    from constants import focalAbbrev
    
    allFocals = [line for line in dataLines if isType(line, focalAbbrev)]
    overlapHdrs = []
    
    for focal in allFocals:
        allOverlaps = findOverlaps(focal, allFocals)
        for overlap in allOverlaps:
            overlapHdrs.append((focal,overlap))
    return overlapHdrs
Ejemplo n.º 30
0
def checkActorActeeNotReal(dataLines):
    '''
    Checks ad-lib lines in dataLines for cases where either the actor or actee
    is noted as "NULL" or some other placeholder-type value.
    
    This function is different from checkNeighborNotReal in that it uses a
    different (larger) set of "placeholder" values.  Some of these values are
    okay for use as neighbors.  See Babase documentation.
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.
    
    Returns a list of lists of strings: the lines where this is true.
    '''
    from constants import unknSnames, unnamedCodes, adlibAbbrev
    
    # Make a set of known "placeholder" codes to check for 
    plcHoldrs = set(unknSnames.keys()).union(unnamedCodes)
    
    linesOfInterest = [line for line in dataLines if isType(line, adlibAbbrev)]
    
    return [line for line in linesOfInterest if line[5] in plcHoldrs or line[7] in plcHoldrs]
Ejemplo n.º 31
0
def checkNeighborNotReal(dataLines):
    '''
    Checks neighbor lines in dataLines for cases where the neighbor is noted as
    "INF" (a not-yet-named infant) or some other placeholder-type value.
    
    This function is different from checkActorActeeNotReal in that it uses a
    different (smaller) set of "placeholder" values.  Some of values used as
    neighbors are not allowed for use in ad-libs.  See Babase documentation.
    
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.
    
    Returns a list of lists of strings: the lines where this is true.
    '''
    from constants import unnamedCodes, neighborAbbrev
    
    # Make a set of known "placeholder" codes to check for 
    plcHoldrs = set(unnamedCodes)
    
    linesOfInterest = [line for line in dataLines if isType(line, neighborAbbrev)]
    
    return [line for line in linesOfInterest if line[5] in plcHoldrs or line[7] in plcHoldrs]
Ejemplo n.º 32
0
def checkFocalOverlaps(dataLines):
    '''
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split. [0] in each list of strings is the "type" of data
    recorded in that line.
    
    Checks for overlapping focals in dataLines, and returns a list of
    (focal1, focal2) tuples. focal1 and 2 are both lists of strings.
        focal1 is a "header" line, focal2 is the "header" of the last focal to
        occur during focal1
    '''
    
    from constants import focalAbbrev
    
    allFocals = [line for line in dataLines if isType(line, focalAbbrev)]
    overlapHdrs = []
    
    for focal in allFocals:
        allOverlaps = findOverlaps(focal, allFocals)
        for overlap in allOverlaps:
            overlapHdrs.append((focal,overlap))
    return overlapHdrs
Ejemplo n.º 33
0
def checkDuplicateGroups(dataLines):
    '''
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.  The following are assumed true about the lists of
    strings:
        1) The line "type" is indicated by the string at [0]
        2) The date for event is at [2]
        3) For lines indicating a new focal sample, the focal group is at [4]
    
    Gathers all the lines indicating the beginning of new focal samples, and
    checks for cases where more than one group was sampled on a single day.
    
    Returns a list of (date, groups) tuples (date and groups are both strings),
    sorted by date, listing all duplicates. If none found, returns an empty
    list.
    '''
    from constants import focalAbbrev
    
    # Make set of (date, group) tuples from all the data
    focalInfoSet = set([(line[2], line[4]) for line in dataLines if isType(line, focalAbbrev)])
    
    # Dictionary of dates (keys) and list of group(s) (values) sampled on those dates
    datesGroups = {}
    
    for (focalDate, focalGrp) in focalInfoSet:
        if focalDate not in datesGroups:
            datesGroups[focalDate] = [focalGrp]
        else:
            datesGroups[focalDate].append(focalGrp)
            
    duplicateFocals = []
    
    for (focalDate, groups) in sorted(datesGroups.items(), key = lambda pair: pair[0]): # Sort by date
        if len(groups) > 1:
            duplicateFocals.append((focalDate, str(groups)))            
    
    return duplicateFocals
Ejemplo n.º 34
0
def checkDuplicateGroups(dataLines):
    '''
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.  The following are assumed true about the lists of
    strings:
        1) The line "type" is indicated by the string at [0]
        2) The date for event is at [2]
        3) For lines indicating a new focal sample, the focal group is at [4]
    
    Gathers all the lines indicating the beginning of new focal samples, and
    checks for cases where more than one group was sampled on a single day.
    
    Returns a list of (date, groups) tuples (date and groups are both strings),
    sorted by date, listing all duplicates. If none found, returns an empty
    list.
    '''
    from constants import focalAbbrev
    
    # Make set of (date, group) tuples from all the data
    focalInfoSet = set([(line[2], line[4]) for line in dataLines if isType(line, focalAbbrev)])
    
    # Dictionary of dates (keys) and list of group(s) (values) sampled on those dates
    datesGroups = {}
    
    for (focalDate, focalGrp) in focalInfoSet:
        if focalDate not in datesGroups:
            datesGroups[focalDate] = [focalGrp]
        else:
            datesGroups[focalDate].append(focalGrp)
            
    duplicateFocals = []
    
    for (focalDate, groups) in sorted(datesGroups.items(), key = lambda pair: pair[0]): # Sort by date
        if len(groups) > 1:
            duplicateFocals.append((focalDate, str(groups)))            
    
    return duplicateFocals
Ejemplo n.º 35
0
def importDataForCompares(dataLines):
    '''
    dataLines is a list of lists of strings: the data from a processed 
    prim8 data file, stripped and split.
    
    Collects all "header" rows (beginning rows for a focal sample)
    into a list.  A tab-delimited "Y" or "N" is added to each of
    these, indicating if the focal was completed.
    
        "N" is for samples that had no data, no points, or only out-
            of-sight points.
        "Y" is used for all others.
    
    After adding Y/N to each sample's header, some values in these
    rows are removed, while others are rearranged, as follows:
    
    Before:
    HDR	OBS	yyyy-mm-dd	HH:MM:SS	GGG	NNN	TTT	hh:mm:ss	Y/N
    
        HDR is the code used to indicate that this is a header.
        OBS is the observer's initials.
        yyyy-mm-dd is obvious.
        HH:MM:SS is the start time
        GGG is the group's abbreviation
        NNN is the focal individual's sname
        TTT is the focal sample type
        hh:mm:ss is the end time
        Y/N is the one-character code added, described above
    
    After:
    HH:MM:SS	yyyy-mm-dd	GGG	OBS	NNN	Y/N	FULLTEXT	NumPoints	NumIS
    
        (Same abbreviations as above, except...)
        FULLTEXT is the full text of the line in "before", joined
            together as a single string. Useful for output.
        NumPoints is an integer, indicating how many points were
            recorded in this sample. Points that occurred during this
            time but with a different focal individual are NOT
            included in this count.
        NumIS is an integer, the number of points in this sample that
            were recorded "in sight" (i.e. not out of sight). Points
            that occurred during this time but with a different focal
            individual are NOT included in this count.
        
        This order is used because it's similar to that used in the
        log file.  It makes comparisons easier to write.
    
    Returns a list of lists of strings, each list of strings for each
    focal sample, and in the "After" format shown above.
    '''
    # List of all the header rows, i.e. the focal samples
    theFocals = [line[:] for line in dataLines if isType(line, focalAbbrev)]

    # List of all the focals that were not "completed". This means any
    # samples with no points, or only out-of-sight points.  If we
    # first remove all the out-of-sight points, then gathering samples
    # with no points will include all of these at once.
    badData = dataLines[:]
    badData = [
        line[:] for line in badData if line not in pointsOutOfSight(dataLines)
    ]

    # Make list of rows with no points (or only OOS points)
    incompletes = [
        line[:]
        for line in theseWithoutThose(badData, focalAbbrev, [pntAbbrev])
    ]

    # Add a "Y" or "N" to each row in theFocals to indicate how
    # "complete" the sample was
    for line in theFocals:
        if line in incompletes:
            line.append("N")
        else:
            line.append("Y")

    # Get points per focal
    pointsPerFocal = countPointsPerFocal(dataLines)
    # Get in-sight points per focal
    pointsISPerFocal = countPointsPerFocal(dataLines, incOutOfSights=False)
    # Convert these data to dictionaries, to simplify the next step
    pointsPerFocal = {focal: points for (focal, points) in pointsPerFocal}
    pointsISPerFocal = {focal: points for (focal, points) in pointsISPerFocal}

    # Now rearrange the columns into their desired output format
    outData = []
    for line in theFocals:
        newLine = []
        newLine.append(line[3])  # Time
        newLine.append(line[2])  # Date
        newLine.append(line[4])  # Grp
        newLine.append(line[1])  # Observer
        newLine.append(line[5])  # Sname
        newLine.append(line[-1])  # Complete
        fullText = "\t".join(line[:-1])  # Omit the "complete" value
        newLine.append(fullText)  # Fulltext
        numPoints = pointsPerFocal[fullText]
        newLine.append(str(numPoints))  # NumPoints
        numIS = pointsISPerFocal[fullText]
        newLine.append(str(numIS))  # NumIS
        outData.append(newLine)

    return outData
Ejemplo n.º 36
0
def checkUniqueNeighbors(dataLines, sampleProtocols):
    '''
    Checks all the recorded neighbors for each point collected during
    sampleProtocols samples to make sure that the list of neighbors is unique.
    Ideally, only check juvenile samples because the adult female protocol
    allows for some redundancy.  Returns a list of lists of strings: the point
    lines that have non-unique neighbors, and all of the point's associated
    neighbor lines.
    
    When considering uniqueness of neighbors, placeholder names (any names in
    constants.unknSnames) are ignored.
    
    If a point has more than three neighbors, the neighbors are likely
    not unique, but neighbor uniqueness really isn't _the_ problem.
    Those cases are not returned by this function.
    
    dataLines is a list of lists of strings, presumed to be all the data from a
    file, stripped and split. sampleProtocols is a list of strings.
    '''
    from constants import unknSnames, focalAbbrev, pntAbbrev, neighborAbbrev
    
    # Make placeholders for iteration.
    # Holds the last-read "header" line, but only for focals of the type(s) allowed by sampleProtocols
    currentHeader = []
    #Holds the last-read "point" line, but only if it was in an allowed focal 
    currentPoint = []
    
    # Dictionary of points and neighbors.
    myPnts = {}
    #   Key: the point line--joined as a string
    #   Value: list of the neighbor lines (as lists of strings) for the point
    
    # Key for dictionary when neighbors are missing a point line
    missingPntKey = '(MISSING POINT LINE)'
    myPnts[missingPntKey] = []
    
    for line in dataLines:
        if line[0] not in [focalAbbrev, pntAbbrev, neighborAbbrev]:
            # Then we don't care about it for this question
            continue
        elif isType(line, focalAbbrev):
            if line[6] in sampleProtocols: #This is a focal sample of interest
                currentHeader = line[:]
                currentPoint = []
            else: #We don't care about any of the data in this focal
                currentHeader = []
                currentPoint = []
                continue
        # All that's left are points and neighbors. If currentHeader is empty, then we don't care about any of these.
        elif currentHeader == []:
            continue
        # Only points and neighbors that actually happened during focals of interest are left
        elif isType(line, pntAbbrev):
            if sameDate(line, currentHeader): #This should always be true, but added here just in case
                currentPoint = line[:]
                myPnts['\t'.join(currentPoint)] = []
        elif isType(line, neighborAbbrev):
            if currentPoint == []: #This should only happen if the observer messed up somewhere else
                myPnts[missingPntKey].append(line)
            elif sameDate(line, currentPoint):
                myPnts['\t'.join(currentPoint)].append(line)
    
    # Get list of names that are allowed to be nonunique
    fakeNames = unknSnames.keys()
    
    # Make list to hold the point and neighbor lines with nonunique neighbors
    nonUniqueNeighbors = []
    
    for (point, neighbors) in sorted(myPnts.items()):
        if len(neighbors) > 3:
            # Then you've got too many neighbors.  Don't bother
            # with checking for neighbor uniqueness, this point has
            # bigger issues.
            continue
        nghNames = []
        for neighbor in neighbors: #Collect all the neighbor names into one list
            if neighbor[7] not in fakeNames:
                nghNames.append(neighbor[7])
        if len(nghNames) > len(set(nghNames)): #Then 1 or more neighbors is redundant
            if len(nonUniqueNeighbors) > 0: # For every instance after the first, add a newline first
                nonUniqueNeighbors.append([]) # When this is output to file, a newline will be added
            nonUniqueNeighbors.append(point.split('\t'))
            for neighbor in neighbors:
                nonUniqueNeighbors.append(neighbor)

    return nonUniqueNeighbors
Ejemplo n.º 37
0
def theseWithoutThose(dataLines, thisType, notThose, butYesThem = [], beforeThem = []):
    '''
    Checks the data in dataLines and collects lines of "type" thisType that
    don't have any notThose lines after them before the next thisType line.
    For example, in the data below:
        1 HDR datadatadatadata
        2 PNT datadatadatadata
        3 HDR datadatadatadata
        4 HDR datadatadatadata
        5 PNT datadatadatadata
        6 NGH datadatadatadata
    
        theseWithoutThose(theData, HDR, [PNT]) would not return line 1 or 4
        because a PNT line occurs before the next HDR or before the end of data.
        Only line 3 would be returned.
        
        theseWithoutThose(theData, PNT, [NGH]) would return line 2 but not 5.
        
    If the optional "butYesThem" is given, then also add the qualification that
    at least one line of at least one type in butYesThem must also occur before
    the next "thisType" line. For example given "theData" above:
        
        theseWithoutThose(theData, HDR, [PNT], [NGH]) would return no lines.
        
        theseWithoutThose(theData, HDR, [NGH], [PNT]) would return only line 1.
    
    If the optional "beforeThem" is given, then add the qualification that there
    also mustn't be any "notThose" lines before lines of the type(s) included in
    "beforeThem".  For example, given "theData" below (slightly different from above):
        1 HDR datadatadatadata
        2 PNT datadatadatadata
        3 HDR datadatadatadata
        4 NGH datadatadatadata
        5 PNT datadatadatadata
        6 NGH datadatadatadata
        7 HDR datadatadatadata
    
        theseWithoutThose(theData, 'PNT', ['NGH'], beforeThem = ['HDR']) would
        return line 2
        
        theseWithoutThose(theData, 'PNT', ['NGH']) would return
        nothing
        
        theseWithoutThose(theData, 'HDR', ['PNT'], ['NGH'],['PNT']) would return
        line 3 (it's essentially asking for HDR with NGH before any PNT)
    
    
    dataLines is a list of lists of strings, presumed to be all the data from a
    file, stripped and split. [0] in each list of strings is the "type" of data
    recorded in that line.
    
    thisType is a string, indicating which line types to check for. notThose
    is a list of strings indicating which line types can't follow thisType
    lines. butYesThem is also a list of strings.
    
    thisType and any items in butYesThem cannot be in notThose.
    
    Returns a list of lists of strings.
    '''
    
    if thisType in notThose:
        return ['ERROR (' + thisType + ': Cannot exclude an item type from itself']
    for this in butYesThem:
        if this in notThose:
            return ['ERROR (' + this + '): Cannot require and forbid an item type'] 
    
    these = []
    maybeThis = []
    yesFound = False
    
    checkYes = False
    if len(butYesThem) > 0:
        checkYes = True
    
    for line in dataLines:
        if maybeThis == []:
            if isType(line, thisType):
                maybeThis = line[:]
                if checkYes:
                    yesFound = False
        else: #maybeThis is not empty, so there's a candidate for "these"
            if checkYes:
                if line[0] in beforeThem: # When checkYes, notThose can be in beforeThem, so check beforeThem first
                    if yesFound: # Hooray!
                        these.append(maybeThis)
                    maybeThis = []
                    yesFound = False
                elif line[0] in notThose: # maybeThis is a "these" but not "without those"
                    maybeThis = []
                    yesFound = False
                elif line[0] in butYesThem: #found a "butYesThem"
                    yesFound = True
                elif isType(line, thisType):
                    if yesFound: # Winner!
                        these.append(maybeThis)
            
            else:
                if line[0] in notThose: # maybeThis is a "these" but not "without those"
                    maybeThis = []
                    
                elif isType(line, thisType) or line[0] in beforeThem: # maybeThis didn't have any of "notThose"
                    these.append(maybeThis)
                    
                    maybeThis = []
                    if isType(line, thisType):
                        maybeThis = line[:]
    
    if maybeThis != []: # we have a maybeThis and got to end of data without finding any of "those". Add it. Unless...
        if checkYes and not yesFound: # Made it to end, but no yesFound. Don't add.
            pass
        else:
            these.append(maybeThis)
    
    return these
Ejemplo n.º 38
0
def theseWithoutThose(dataLines, thisType, notThose, butYesThem = [], beforeThem = []):
    '''
    Checks the data in dataLines and collects lines of "type" thisType that
    don't have any notThose lines after them before the next thisType line.
    For example, in the data below:
        1 HDR datadatadatadata
        2 PNT datadatadatadata
        3 HDR datadatadatadata
        4 HDR datadatadatadata
        5 PNT datadatadatadata
        6 NGH datadatadatadata
    
        theseWithoutThose(theData, HDR, [PNT]) would not return line 1 or 4
        because a PNT line occurs before the next HDR or before the end of data.
        Only line 3 would be returned.
        
        theseWithoutThose(theData, PNT, [NGH]) would return line 2 but not 5.
        
    If the optional "butYesThem" is given, then also add the qualification that
    at least one line of at least one type in butYesThem must also occur before
    the next "thisType" line. For example given "theData" above:
        
        theseWithoutThose(theData, HDR, [PNT], [NGH]) would return no lines.
        
        theseWithoutThose(theData, HDR, [NGH], [PNT]) would return only line 1.
    
    If the optional "beforeThem" is given, then add the qualification that there
    also mustn't be any "notThose" lines before lines of the type(s) included in
    "beforeThem".  For example, given "theData" below (slightly different from above):
        1 HDR datadatadatadata
        2 PNT datadatadatadata
        3 HDR datadatadatadata
        4 NGH datadatadatadata
        5 PNT datadatadatadata
        6 NGH datadatadatadata
        7 HDR datadatadatadata
    
        theseWithoutThose(theData, 'PNT', ['NGH'], beforeThem = ['HDR']) would
        return line 2
        
        theseWithoutThose(theData, 'PNT', ['NGH']) would return
        nothing
        
        theseWithoutThose(theData, 'HDR', ['PNT'], ['NGH'],['PNT']) would return
        line 3 (it's essentially asking for HDR with NGH before any PNT)
    
    
    dataLines is a list of lists of strings, presumed to be all the data from a
    file, stripped and split. [0] in each list of strings is the "type" of data
    recorded in that line.
    
    thisType is a string, indicating which line types to check for. notThose
    is a list of strings indicating which line types can't follow thisType
    lines. butYesThem is also a list of strings.
    
    thisType and any items in butYesThem cannot be in notThose.
    
    Returns a list of lists of strings.
    '''
    
    if thisType in notThose:
        return ['ERROR (' + thisType + ': Cannot exclude an item type from itself']
    for this in butYesThem:
        if this in notThose:
            return ['ERROR (' + this + '): Cannot require and forbid an item type'] 
    
    these = []
    maybeThis = []
    yesFound = False
    
    checkYes = False
    if len(butYesThem) > 0:
        checkYes = True
    
    for line in dataLines:
        if maybeThis == []:
            if isType(line, thisType):
                maybeThis = line[:]
                if checkYes:
                    yesFound = False
        else: #maybeThis is not empty, so there's a candidate for "these"
            if checkYes:
                if line[0] in beforeThem: # When checkYes, notThose can be in beforeThem, so check beforeThem first
                    if yesFound: # Hooray!
                        these.append(maybeThis)
                    maybeThis = []
                    yesFound = False
                elif line[0] in notThose: # maybeThis is a "these" but not "without those"
                    maybeThis = []
                    yesFound = False
                elif line[0] in butYesThem: #found a "butYesThem"
                    yesFound = True
                elif isType(line, thisType):
                    if yesFound: # Winner!
                        these.append(maybeThis)
            
            else:
                if line[0] in notThose: # maybeThis is a "these" but not "without those"
                    maybeThis = []
                    
                elif isType(line, thisType) or line[0] in beforeThem: # maybeThis didn't have any of "notThose"
                    these.append(maybeThis)
                    
                    maybeThis = []
                    if isType(line, thisType):
                        maybeThis = line[:]
    
    if maybeThis != []: # we have a maybeThis and got to end of data without finding any of "those". Add it. Unless...
        if checkYes and not yesFound: # Made it to end, but no yesFound. Don't add.
            pass
        else:
            these.append(maybeThis)
    
    return these
Ejemplo n.º 39
0
def checkUniqueNeighbors(dataLines, sampleProtocols):
    '''
    Checks all the recorded neighbors for each point collected during
    sampleProtocols samples to make sure that the list of neighbors is unique.
    Ideally, only check juvenile samples because the adult female protocol
    allows for some redundancy.  Returns a list of lists of strings: the point
    lines that have non-unique neighbors, and all of the point's associated
    neighbor lines.
    
    When considering uniqueness of neighbors, placeholder names (any names in
    constants.unknSnames) are ignored.
    
    dataLines is a list of lists of strings, presumed to be all the data from a
    file, stripped and split. sampleProtocols is a list of strings.
    '''
    from constants import unknSnames, focalAbbrev, pntAbbrev, neighborAbbrev
    
    # Make placeholders for iteration.
    # Holds the last-read "header" line, but only for focals of the type(s) allowed by sampleProtocols
    currentHeader = []
    #Holds the last-read "point" line, but only if it was in an allowed focal 
    currentPoint = []
    
    # Dictionary of points and neighbors.
    myPnts = {}
    #   Key: the point line--joined as a string
    #   Value: list of the neighbor lines (as lists of strings) for the point
    
    # Key for dictionary when neighbors are missing a point line
    missingPntKey = '(MISSING POINT LINE)'
    myPnts[missingPntKey] = []
    
    for line in dataLines:
        if line[0] not in [focalAbbrev, pntAbbrev, neighborAbbrev]:
            # Then we don't care about it for this question
            continue
        elif isType(line, focalAbbrev):
            if line[6] in sampleProtocols: #This is a focal sample of interest
                currentHeader = line[:]
                currentPoint = []
            else: #We don't care about any of the data in this focal
                currentHeader = []
                currentPoint = []
                continue
        # All that's left are points and neighbors. If currentHeader is empty, then we don't care about any of these.
        elif currentHeader == []:
            continue
        # Only points and neighbors that actually happened during focals of interest are left
        elif isType(line, pntAbbrev):
            if sameDate(line, currentHeader): #This should always be true, but added here just in case
                currentPoint = line[:]
                myPnts['\t'.join(currentPoint)] = []
        elif isType(line, neighborAbbrev):
            if currentPoint == []: #This should only happen if the observer messed up somewhere else
                myPnts[missingPntKey].append(line)
            elif sameDate(line, currentPoint):
                myPnts['\t'.join(currentPoint)].append(line)
    
    # Get list of names that are allowed to be nonunique
    fakeNames = unknSnames.keys()
    
    # Make list to hold the point and neighbor lines with nonunique neighbors
    nonUniqueNeighbors = []
    
    for (point, neighbors) in myPnts.iteritems():
        nghNames = []
        for neighbor in neighbors: #Collect all the neighbor names into one list
            if neighbor[7] not in fakeNames:
                nghNames.append(neighbor[7])
        if len(nghNames) > len(set(nghNames)): #Then 1 or more neighbors is redundant
            if len(nonUniqueNeighbors) > 0: # For every instance after the first, add a newline first
                nonUniqueNeighbors.append([]) # When this is output to file, a newline will be added
            nonUniqueNeighbors.append(point.split('\t'))
            for neighbor in neighbors:
                nonUniqueNeighbors.append(neighbor)

    return nonUniqueNeighbors
Ejemplo n.º 40
0
def errorAlertSummary(dataLines):
    '''
    dataLines is a list of list of strings, presumed to be all the data from a
    file, stripped and split.  Each list of strings should have a 'yyyy-mm-dd'
    date at [2], and a hh:mm:ss time at [3]. A code indicating the sample's
    "type" should be at [0].
    
    Reads the data in dataLines and lists cases of apparent errors in the data.
    Also brings alerts to things that may not be "wrong" but may indicate
    problems:
        -- Same focal collected more than once/day
        -- More than 1 group sampled in a single day
        -- Focals with overlapping times
        -- Focals with no data at all
        -- Focals with no points
        -- Focals with >10 points
        -- Points w/ no neighbors (exclude out of sight points)
        -- Neighbors w/o a preceding PNT
        -- Points w/ >3 neighbors
        -- Points from juvenile samples with non-unique neighbors
        -- Neighbors w/o an N0/N1/N2 code
        -- Notes on days w/o any focals
        -- Actor == Actee
        -- Actor or Actee is a non-sname placeholder (NULL, XXX, 998, etc.)
        -- Neighbor is a non-sname placeholder ('IMM', 'INF')
        -- Notes lines possibly containing mounts, ejaculations, or consorts
        -- Non-note lines that recorded mounts, ejaculations, or consorts
        -- Mounts/Ejaculations/Consorts not during a focal
        -- Mounts/Ejaculations/Consorts not involving the focal individual
        
        Not implemented, but maybe worth adding:
        -- JM's AS/OS/DSing AF's
        -- Actor/actee in different groups
    
    Returns a single string that will include several line breaks.
    '''
    from constants import focalAbbrev, pntAbbrev, neighborAbbrev, noteAbbrev, adlibAbbrev, outOfSightValue, stypeJuv, p8_nghcodes, bb_mount, bb_ejaculation, bb_consort, bb_mount_long, bb_ejaculation_long, bb_consort_long, bb_consort_long2
    
    alertLines = []

    # Add summary header
    commentLine = '------Alerts and Errors:\n'
    alertLines.append(commentLine)
    
    # Check for individuals sampled >1x/day
    alertData = ['\t'.join(line) for line in checkDuplicateFocals(dataLines)]
    commentLine = writeAlert('duplicate (date, sname) pairs', alertData) + '\n'
    alertLines.append(commentLine)

    # Check for >1 group sampled in one day
    alertData = ['\t'.join(line) for line in checkDuplicateGroups(dataLines)]
    commentLine = writeAlert('>1 group sampled in a day', alertData) + '\n'
    alertLines.append(commentLine)
    
    # Check for overlapping focals
    alertData = []
    allOverlaps = checkFocalOverlaps(dataLines)
    for (focal1, focal2) in allOverlaps:
        outFocal1 = ' '.join([focal1[2], focal1[3], focal1[5]]) # Date, time, ID
        outFocal2 = ' '.join([focal2[2], focal2[3], focal2[5]])
        alertData.append(', '.join([outFocal1, outFocal2]))
    commentLine = writeAlert('overlapping focals', alertData) + '\n'
    alertLines.append(commentLine)
    
    # Check for focal samples with no data
    alertData = ['\t'.join(line) for line in theseWithoutThose(dataLines, focalAbbrev, [pntAbbrev, neighborAbbrev, noteAbbrev, adlibAbbrev])]
    commentLine = writeAlert('focal samples with no data', alertData) + '\n'
    alertLines.append(commentLine)
    
    # Check for focal samples with no points
    alertData = ['\t'.join(line) for line in theseWithoutThose(dataLines, focalAbbrev, [pntAbbrev])]
    commentLine = writeAlert('focal samples without points', alertData) + '\n'
    alertLines.append(commentLine)
    
    # Check for focal samples with >10 points
    alertData = [('\t'.join(focal) + '; ' + str(count) + ' points') for (focal, count) in countPointsPerFocal(dataLines) if count > 10]
    commentLine = writeAlert('focal samples with > 10 points', alertData) + '\n'
    alertLines.append(commentLine)
    
    # Check for in-sight points with no neighbors
    alertData = theseWithoutThose(dataLines, pntAbbrev, [neighborAbbrev], beforeThem = [focalAbbrev])
    alertData = ['\t'.join(line) for line in alertData if line[6] != outOfSightValue]
    commentLine = writeAlert('in-sight points w/o neighbors', alertData) + '\n'
    alertLines.append(commentLine)
    
    # Check for neighbors without a preceding point. This occurs in two
    # different ways: Neighbor lines occur just after a focal starts and before
    # any points, or a point is followed by >3 neighbors
    
    # First, neighbors after focals
    alertData = theseWithoutThose(dataLines, focalAbbrev, [pntAbbrev], [neighborAbbrev], [pntAbbrev])
    alertData = ['\t'.join(line) for line in alertData]
    commentLine = writeAlert(('header-then-neighbor, with no ' + pntAbbrev), alertData) +'\n'
    alertLines.append(commentLine)
    
    # Second, points with >3 neighbors
    alertData = [pair[0] for pair in checkNeighborsPerPoint(dataLines) if pair[1] > 3]
    commentLine = writeAlert('points with >3 neighbors', alertData) + '\n'
    alertLines.append(commentLine)
    
    # Check for non-unique neighbors in juvenile samples
    alertData = ['\t'.join(line) for line in checkUniqueNeighbors(dataLines, [stypeJuv])]
    commentLine = writeAlert('non-unique neighbors in juvenile samples', alertData) + '\n'
    alertLines.append(commentLine)
        
    # Check for neighbors without appropriate neighbor codes
    alertData = ['\t'.join(line) for line in dataLines if isType(line, neighborAbbrev) and line[-1] not in p8_nghcodes]
    commentLine = writeAlert('neighbors w/o neighbor codes', alertData) + '\n'
    alertLines.append(commentLine)
    
    # Check for notes on days without any focals
    alertData = ['\t'.join(line) for line in checkNotesNoFocals(dataLines)]
    commentLine = writeAlert('notes on days without any focals', alertData) + '\n'
    alertLines.append(commentLine)
    
    # Check for data where actor is actee, or focal is neighbor
    alertData = ['\t'.join(line) for line in checkActorIsActee(dataLines)]
    commentLine = writeAlert('lines where actor is actee, or focal is neighbor', alertData) + '\n'
    alertLines.append(commentLine)
    
    # Check for data where actor or actee is a non-sname placeholder
    alertData = ['\t'.join(line) for line in checkActorActeeNotReal(dataLines)]
    commentLine = writeAlert('lines where actor or actee is a non-sname placeholder', alertData) + '\n'
    alertLines.append(commentLine)
    
    # Check for lines where neighbor is a non-sname placeholder (different placeholders from ad-libs)
    alertData = ['\t'.join(line) for line in checkNeighborNotReal(dataLines)]
    commentLine = writeAlert('lines where neighbor is a non-sname placeholder', alertData) + '\n'
    alertLines.append(commentLine)
    
    # Check for notes that appear to contain mounts, ejaculations, or consorts
    MEC_list = [bb_mount, bb_ejaculation, bb_consort, bb_mount_long, bb_ejaculation_long, bb_consort_long, bb_consort_long2]
    alertData = ['\t'.join(line) for line in checkBehavsInNotes(dataLines, MEC_list)]
    commentLine = writeAlert('notes that appear to contain mounts, ejaculations, or consorts', alertData) + '\n'
    alertLines.append(commentLine)
    
    # Check for lines with mounts, ejaculations, or consorts recorded as regular, legit behaviors
    alertData = ['\t'.join(line) for line in checkSpecificBehavior(dataLines, MEC_list)]
    commentLine = writeAlert('lines with mounts, ejaculations, or consorts recorded as regular, legit behaviors', alertData) + '\n'
    alertLines.append(commentLine)
    
    # Check for lines with mounts, ejaculations, or consorts recorded outside of a focal sample
    alertData = ['\t'.join(line) for line in checkMountsConsortsDuringFocal(dataLines)]
    commentLine = writeAlert('lines with mounts, ejaculations, or consorts recorded outside of a focal sample', alertData) + '\n'
    alertLines.append(commentLine)
    
    # Check for lines with mounts, ejaculations, or consorts not involving the focal individual
    alertData = ['\t'.join(line) for line in checkMountsConsortsInvolvedFocal(dataLines)]
    commentLine = writeAlert('lines with mounts, ejaculations, or consorts not involving the focal individual', alertData) + '\n'
    alertLines.append(commentLine)
    
    return '\n'.join(alertLines)